diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 01196e19afca..75897e2fde43 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -7,6 +7,13 @@ Description: The name of devfreq object denoted as ... is same as the name of device using devfreq. +What: /sys/class/devfreq/.../name +Date: November 2019 +Contact: Chanwoo Choi +Description: + The /sys/class/devfreq/.../name shows the name of device + of the corresponding devfreq object. + What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst index f6344675e395..695a2ea1d1ae 100644 --- a/Documentation/admin-guide/device-mapper/dm-raid.rst +++ b/Documentation/admin-guide/device-mapper/dm-raid.rst @@ -419,3 +419,5 @@ Version History rebuild errors. 1.15.0 Fix size extensions not being synchronized in case of new MD bitmap pages allocated; also fix those not occuring after previous reductions + 1.15.1 Fix argument count and arguments for rebuild/write_mostly/journal_(dev|mode) + on the status line. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ade4e6ec23e0..53bec4748ab8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -136,6 +136,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the @@ -432,6 +436,11 @@ embedded devices based on command line input. See Documentation/block/cmdline-partition.rst + bmq.timeslice= [KNL] Time slice in us for BMQ scheduler. + Format: (must be >= 1000) + Default: 4000 + See Documentation/scheduler/sched-BMQ.txt + boot_delay= Milliseconds to delay each printk during boot. Values larger than 10 seconds (10000) are changed to no delay (0). diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index def074807cee..e4bc9350f192 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -105,6 +105,7 @@ show up in /proc/sys/kernel: - unknown_nmi_panic - watchdog - watchdog_thresh +- yield_type - version @@ -1173,3 +1174,13 @@ is 10 seconds. The softlockup threshold is (2 * watchdog_thresh). Setting this tunable to zero will disable lockup detection altogether. + +yield_type: +=========== + +BMQ CPU scheduler only. This determines what type of yield calls to +sched_yield will perform. + + 0 - No yield. + 1 - Deboost and requeue task. (default) + 2 - Set run queue skip task. diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf9..4a9d9c794ee5 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml b/Documentation/devicetree/bindings/arm/fsl.yaml index f79683a628f0..1238e68ac1b4 100644 --- a/Documentation/devicetree/bindings/arm/fsl.yaml +++ b/Documentation/devicetree/bindings/arm/fsl.yaml @@ -139,7 +139,7 @@ properties: items: - enum: - armadeus,imx6dl-apf6 # APF6 (Solo) SoM - - armadeus,imx6dl-apf6dldev # APF6 (Solo) SoM on APF6Dev board + - armadeus,imx6dl-apf6dev # APF6 (Solo) SoM on APF6Dev board - eckelmann,imx6dl-ci4x10 - emtrion,emcon-mx6 # emCON-MX6S or emCON-MX6DL SoM - emtrion,emcon-mx6-avari # emCON-MX6S or emCON-MX6DL SoM on Avari Base diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml index 6eb33207a167..5117ad68a584 100644 --- a/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml +++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7606.yaml @@ -82,7 +82,7 @@ properties: Must be the device tree identifier of the over-sampling mode pins. As the line is active high, it should be marked GPIO_ACTIVE_HIGH. - maxItems: 1 + maxItems: 3 adi,sw-mode: description: @@ -125,9 +125,9 @@ examples: adi,conversion-start-gpios = <&gpio 17 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 27 GPIO_ACTIVE_HIGH>; adi,first-data-gpios = <&gpio 22 GPIO_ACTIVE_HIGH>; - adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH - &gpio 23 GPIO_ACTIVE_HIGH - &gpio 26 GPIO_ACTIVE_HIGH>; + adi,oversampling-ratio-gpios = <&gpio 18 GPIO_ACTIVE_HIGH>, + <&gpio 23 GPIO_ACTIVE_HIGH>, + <&gpio 26 GPIO_ACTIVE_HIGH>; standby-gpios = <&gpio 24 GPIO_ACTIVE_LOW>; adi,sw-mode; }; diff --git a/Documentation/devicetree/bindings/net/fsl-fman.txt b/Documentation/devicetree/bindings/net/fsl-fman.txt index 250f8d8cdce4..c00fb0d22c7b 100644 --- a/Documentation/devicetree/bindings/net/fsl-fman.txt +++ b/Documentation/devicetree/bindings/net/fsl-fman.txt @@ -110,6 +110,13 @@ PROPERTIES Usage: required Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt +- fsl,erratum-a050385 + Usage: optional + Value type: boolean + Definition: A boolean property. Indicates the presence of the + erratum A050385 which indicates that DMA transactions that are + split can result in a FMan lock. + ============================================================================= FMan MURAM Node diff --git a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt index f5cdac8b2847..8b005192f6e8 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt +++ b/Documentation/devicetree/bindings/regulator/qcom,spmi-regulator.txt @@ -161,7 +161,7 @@ The regulator node houses sub-nodes for each regulator within the device. Each sub-node is identified using the node's name, with valid values listed for each of the PMICs below. -pm8005: +pm8004: s2, s5 pm8005: diff --git a/Documentation/fb/fbcon.rst b/Documentation/fb/fbcon.rst index ebca41785abe..65ba40255137 100644 --- a/Documentation/fb/fbcon.rst +++ b/Documentation/fb/fbcon.rst @@ -127,7 +127,7 @@ C. Boot options is typically located on the same video card. Thus, the consoles that are controlled by the VGA console will be garbled. -4. fbcon=rotate: +5. fbcon=rotate: This option changes the orientation angle of the console display. The value 'n' accepts the following: @@ -152,21 +152,21 @@ C. Boot options Actually, the underlying fb driver is totally ignorant of console rotation. -5. fbcon=margin: +6. fbcon=margin: This option specifies the color of the margins. The margins are the leftover area at the right and the bottom of the screen that are not used by text. By default, this area will be black. The 'color' value is an integer number that depends on the framebuffer driver being used. -6. fbcon=nodefer +7. fbcon=nodefer If the kernel is compiled with deferred fbcon takeover support, normally the framebuffer contents, left in place by the firmware/bootloader, will be preserved until there actually is some text is output to the console. This option causes fbcon to bind immediately to the fbdev device. -7. fbcon=logo-pos: +8. fbcon=logo-pos: The only possible 'location' is 'center' (without quotes), and when given, the bootup logo is moved from the default top-left corner diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index f18506083ced..26c093969573 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -850,3 +850,11 @@ business doing so. d_alloc_pseudo() is internal-only; uses outside of alloc_file_pseudo() are very suspect (and won't work in modules). Such uses are very likely to be misspelled d_alloc_anon(). + +--- + +**mandatory** + +[should've been added in 2016] stale comment in finish_open() nonwithstanding, +failure exits in ->atomic_open() instances should *NOT* fput() the file, +no matter what. Everything is handled by the caller. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 99ca040e3f90..d6bf97ee44bd 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -46,6 +46,7 @@ Table of Contents 3.10 /proc//timerslack_ns - Task timerslack value 3.11 /proc//patch_state - Livepatch patch operation state 3.12 /proc//arch_status - Task architecture specific information + 3.13 /proc//ksm - Remote KSM 4 Configuring procfs 4.1 Mount options @@ -2010,6 +2011,19 @@ x86 specific entries: the task is unlikely an AVX512 user, but depends on the workload and the scheduling scenario, it also could be a false negative mentioned above. +3.13 /proc//ksm - Remote KSM +-------------------------------------------- +This write-only file allows marking memory of another task for merging +and unmerging via KSM. + +The following actions are available: + + * mark task's memory as mergeable: + # echo merge > /proc//ksm + + * unmerging all the task's memory: + # echo unmerge > /proc//ksm + ------------------------------------------------------------------------------ Configuring procfs ------------------------------------------------------------------------------ diff --git a/Documentation/hwmon/drivetemp.rst b/Documentation/hwmon/drivetemp.rst new file mode 100644 index 000000000000..2d37d049247f --- /dev/null +++ b/Documentation/hwmon/drivetemp.rst @@ -0,0 +1,52 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Kernel driver drivetemp +======================= + + +References +---------- + +ANS T13/1699-D +Information technology - AT Attachment 8 - ATA/ATAPI Command Set (ATA8-ACS) + +ANS Project T10/BSR INCITS 513 +Information technology - SCSI Primary Commands - 4 (SPC-4) + +ANS Project INCITS 557 +Information technology - SCSI / ATA Translation - 5 (SAT-5) + + +Description +----------- + +This driver supports reporting the temperature of disk and solid state +drives with temperature sensors. + +If supported, it uses the ATA SCT Command Transport feature to read +the current drive temperature and, if available, temperature limits +as well as historic minimum and maximum temperatures. If SCT Command +Transport is not supported, the driver uses SMART attributes to read +the drive temperature. + + +Sysfs entries +------------- + +Only the temp1_input attribute is always available. Other attributes are +available only if reported by the drive. All temperatures are reported in +milli-degrees Celsius. + +======================= ===================================================== +temp1_input Current drive temperature +temp1_lcrit Minimum temperature limit. Operating the device below + this temperature may cause physical damage to the + device. +temp1_min Minimum recommended continuous operating limit +temp1_max Maximum recommended continuous operating temperature +temp1_crit Maximum temperature limit. Operating the device above + this temperature may cause physical damage to the + device. +temp1_lowest Minimum temperature seen this power cycle +temp1_highest Maximum temperature seen this power cycle +======================= ===================================================== diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst index 43cc605741ea..a730b2a619fa 100644 --- a/Documentation/hwmon/index.rst +++ b/Documentation/hwmon/index.rst @@ -47,6 +47,7 @@ Hardware Monitoring Kernel Drivers da9055 dell-smm-hwmon dme1737 + drivetemp ds1621 ds620 emc1403 diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 69fa48ee93d6..e0b45a257f21 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -470,9 +470,9 @@ build. The syntax of the Module.symvers file is:: - + - 0xe1cc2a05 usb_stor_suspend USB_STORAGE drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL + 0xe1cc2a05 usb_stor_suspend drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL USB_STORAGE The fields are separated by tabs and values may be empty (e.g. if no namespace is defined for an exported symbol). diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt index ca2136c76042..0bf32d1121be 100644 --- a/Documentation/networking/nf_flowtable.txt +++ b/Documentation/networking/nf_flowtable.txt @@ -76,7 +76,7 @@ flowtable and add one rule to your forward chain. table inet x { flowtable f { - hook ingress priority 0 devices = { eth0, eth1 }; + hook ingress priority 0; devices = { eth0, eth1 }; } chain y { type filter hook forward priority 0; policy accept; diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt new file mode 100644 index 000000000000..05c84eec0f31 --- /dev/null +++ b/Documentation/scheduler/sched-BMQ.txt @@ -0,0 +1,110 @@ + BitMap queue CPU Scheduler + -------------------------- + +CONTENT +======== + + Background + Design + Overview + Task policy + Priority management + BitMap Queue + CPU Assignment and Migration + + +Background +========== + +BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution +of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS), +and inspired by Zircon scheduler. The goal of it is to keep the scheduler code +simple, while efficiency and scalable for interactive tasks, such as desktop, +movie playback and gaming etc. + +Design +====== + +Overview +-------- + +BMQ use per CPU run queue design, each CPU(logical) has it's own run queue, +each CPU is responsible for scheduling the tasks that are putting into it's +run queue. + +The run queue is a set of priority queues. Note that these queues are fifo +queue for non-rt tasks or priority queue for rt tasks in data structure. See +BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact +that most applications are non-rt tasks. No matter the queue is fifo or +priority, In each queue is an ordered list of runnable tasks awaiting execution +and the data structures are the same. When it is time for a new task to run, +the scheduler simply looks the lowest numbered queueue that contains a task, +and runs the first task from the head of that queue. And per CPU idle task is +also in the run queue, so the scheduler can always find a task to run on from +its run queue. + +Each task will assigned the same timeslice(default 4ms) when it is picked to +start running. Task will be reinserted at the end of the appropriate priority +queue when it uses its whole timeslice. When the scheduler selects a new task +from the priority queue it sets the CPU's preemption timer for the remainder of +the previous timeslice. When that timer fires the scheduler will stop execution +on that task, select another task and start over again. + +If a task blocks waiting for a shared resource then it's taken out of its +priority queue and is placed in a wait queue for the shared resource. When it +is unblocked it will be reinserted in the appropriate priority queue of an +eligible CPU. + +Task policy +----------- + +BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the +mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's +NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each +policy. + +DEADLINE + It is squashed as priority 0 FIFO task. + +FIFO/RR + All RT tasks share one single priority queue in BMQ run queue designed. The +complexity of insert operation is O(n). BMQ is not designed for system runs +with major rt policy tasks. + +NORMAL/BATCH/IDLE + BATCH and IDLE tasks are treated as the same policy. They compete CPU with +NORMAL policy tasks, but they just don't boost. To control the priority of +NORMAL/BATCH/IDLE tasks, simply use nice level. + +ISO + ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy +task instead. + +Priority management +------------------- + +RT tasks have priority from 0-99. For non-rt tasks, there are three different +factors used to determine the effective priority of a task. The effective +priority being what is used to determine which queue it will be in. + +The first factor is simply the task’s static priority. Which is assigned from +task's nice level, within [-20, 19] in userland's point of view and [0, 39] +internally. + +The second factor is the priority boost. This is a value bounded between +[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is +modified by the following cases: + +*When a thread has used up its entire timeslice, always deboost its boost by +increasing by one. +*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule, +and its switch-in time(time after last switch and run) below the thredhold +based on its priority boost, will boost its boost by decreasing by one buti is +capped at 0 (won’t go negative). + +The intent in this system is to ensure that interactive threads are serviced +quickly. These are usually the threads that interact directly with the user +and cause user-perceivable latency. These threads usually do little work and +spend most of their time blocked awaiting another user event. So they get the +priority boost from unblocking while background threads that do most of the +processing receive the priority penalty for using their entire timeslice. diff --git a/Documentation/sphinx/parallel-wrapper.sh b/Documentation/sphinx/parallel-wrapper.sh index 7daf5133bdd3..e54c44ce117d 100644 --- a/Documentation/sphinx/parallel-wrapper.sh +++ b/Documentation/sphinx/parallel-wrapper.sh @@ -30,4 +30,4 @@ if [ -n "$parallel" ] ; then parallel="-j$parallel" fi -exec "$sphinx" "$parallel" "$@" +exec "$sphinx" $parallel "$@" diff --git a/MAINTAINERS b/MAINTAINERS index 56765f542244..8cebe538393b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8302,7 +8302,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel @@ -8808,8 +8808,10 @@ L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org W: http://www.isdn4linux.de S: Maintained -F: drivers/isdn/mISDN -F: drivers/isdn/hardware +F: drivers/isdn/mISDN/ +F: drivers/isdn/hardware/ +F: drivers/isdn/Kconfig +F: drivers/isdn/Makefile ISDN/CAPI SUBSYSTEM M: Karsten Keil @@ -17869,6 +17871,14 @@ L: linux-gpio@vger.kernel.org S: Maintained F: drivers/gpio/gpio-ws16c48.c +WIREGUARD SECURE NETWORK TUNNEL +M: Jason A. Donenfeld +S: Maintained +F: drivers/net/wireguard/ +F: tools/testing/selftests/wireguard/ +L: wireguard@lists.zx2c4.com +L: netdev@vger.kernel.org + WISTRON LAPTOP BUTTON DRIVER M: Miloslav Trmac S: Maintained diff --git a/Makefile b/Makefile index 6a01b073915e..d0faa1237bda 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf8 NAME = Kleptomaniac Octopus # *DOCUMENTATION* @@ -1239,7 +1239,7 @@ ifneq ($(dtstree),) %.dtb: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@ -PHONY += dtbs dtbs_install dt_binding_check +PHONY += dtbs dtbs_install dtbs_check dtbs dtbs_check: include/config/kernel.release scripts_dtc $(Q)$(MAKE) $(build)=$(dtstree) @@ -1259,6 +1259,7 @@ PHONY += scripts_dtc scripts_dtc: scripts_basic $(Q)$(MAKE) $(build)=scripts/dtc +PHONY += dt_binding_check dt_binding_check: scripts_dtc $(Q)$(MAKE) $(build)=Documentation/devicetree/bindings @@ -1691,7 +1692,7 @@ PHONY += descend $(build-dirs) descend: $(build-dirs) $(build-dirs): prepare $(Q)$(MAKE) $(build)=$@ \ - single-build=$(if $(filter-out $@/, $(single-no-ko)),1) \ + single-build=$(if $(filter-out $@/, $(filter $@/%, $(single-no-ko))),1) \ need-builtin=1 need-modorder=1 clean-dirs := $(addprefix _clean_, $(clean-dirs)) diff --git a/arch/Kconfig b/arch/Kconfig index 48b5e103bdb0..5e907a954532 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -396,10 +396,10 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE config HAVE_RCU_TABLE_FREE bool -config HAVE_RCU_TABLE_NO_INVALIDATE +config HAVE_MMU_GATHER_PAGE_SIZE bool -config HAVE_MMU_GATHER_PAGE_SIZE +config MMU_GATHER_NO_RANGE bool config HAVE_MMU_GATHER_NO_GATHER diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index f9a5c9ddcae7..1d109b06e7d8 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -78,6 +78,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index d9ee43c6b7db..fe19f1d412e7 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -29,6 +29,8 @@ .endm #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 96dab76da3b3..7ef1916fcbf4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -74,7 +74,7 @@ config ARM select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT - select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE @@ -1905,7 +1905,7 @@ config XIP_DEFLATED_DATA config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/arm/Makefile b/arch/arm/Makefile index db857d07114f..1fc32b611f8a 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -307,13 +307,15 @@ endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare stack_protector_prepare: prepare0 - $(eval KBUILD_CFLAGS += \ + $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \ awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\ include/generated/asm-offsets.h) \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\ include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) + $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index a1e883c5e5c4..6da67789ac22 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -101,7 +101,6 @@ clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ $(libfdt) $(libfdt_hdrs) hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -117,7 +116,8 @@ CFLAGS_fdt_ro.o := $(nossp_flags) CFLAGS_fdt_rw.o := $(nossp_flags) CFLAGS_fdt_wip.o := $(nossp_flags) -ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \ + -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index ead21e5f2b80..469a2b3b60c0 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -140,6 +140,17 @@ #endif .endm + .macro enable_cp15_barriers, reg + mrc p15, 0, \reg, c1, c0, 0 @ read SCTLR + tst \reg, #(1 << 5) @ CP15BEN bit set? + bne .L_\@ + orr \reg, \reg, #(1 << 5) @ CP15 barrier instructions + mcr p15, 0, \reg, c1, c0, 0 @ write SCTLR + ARM( .inst 0xf57ff06f @ v7+ isb ) + THUMB( isb ) +.L_\@: + .endm + .section ".start", "ax" /* * sort out different calling conventions @@ -820,6 +831,7 @@ __armv4_mmu_cache_on: mov pc, r12 __armv7_mmu_cache_on: + enable_cp15_barriers r11 mov r12, lr #ifdef CONFIG_MMU mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0 @@ -1209,6 +1221,7 @@ __armv6_mmu_cache_flush: mov pc, lr __armv7_mmu_cache_flush: + enable_cp15_barriers r10 tst r4, #1 bne iflush mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1 diff --git a/arch/arm/boot/dts/am437x-idk-evm.dts b/arch/arm/boot/dts/am437x-idk-evm.dts index f3ced6df0c9b..9f66f96d09c9 100644 --- a/arch/arm/boot/dts/am437x-idk-evm.dts +++ b/arch/arm/boot/dts/am437x-idk-evm.dts @@ -526,11 +526,11 @@ * Supply voltage supervisor on board will not allow opp50 so * disable it and set opp100 as suspend OPP. */ - opp50@300000000 { + opp50-300000000 { status = "disabled"; }; - opp100@600000000 { + opp100-600000000 { opp-suspend; }; }; diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi index 091356f2a8c1..c726cd8dbdf1 100644 --- a/arch/arm/boot/dts/am43xx-clocks.dtsi +++ b/arch/arm/boot/dts/am43xx-clocks.dtsi @@ -704,6 +704,60 @@ ti,bit-shift = <8>; reg = <0x2a48>; }; + + clkout1_osc_div_ck: clkout1-osc-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&sys_clkin_ck>; + ti,bit-shift = <20>; + ti,max-div = <4>; + reg = <0x4100>; + }; + + clkout1_src2_mux_ck: clkout1-src2-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clk_rc32k_ck>, <&sysclk_div>, <&dpll_ddr_m2_ck>, + <&dpll_per_m2_ck>, <&dpll_disp_m2_ck>, + <&dpll_mpu_m2_ck>; + reg = <0x4100>; + }; + + clkout1_src2_pre_div_ck: clkout1-src2-pre-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_mux_ck>; + ti,bit-shift = <4>; + ti,max-div = <8>; + reg = <0x4100>; + }; + + clkout1_src2_post_div_ck: clkout1-src2-post-div-ck { + #clock-cells = <0>; + compatible = "ti,divider-clock"; + clocks = <&clkout1_src2_pre_div_ck>; + ti,bit-shift = <8>; + ti,max-div = <32>; + ti,index-power-of-two; + reg = <0x4100>; + }; + + clkout1_mux_ck: clkout1-mux-ck { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&clkout1_osc_div_ck>, <&clk_rc32k_ck>, + <&clkout1_src2_post_div_ck>, <&dpll_extdev_m2_ck>; + ti,bit-shift = <16>; + reg = <0x4100>; + }; + + clkout1_ck: clkout1-ck { + #clock-cells = <0>; + compatible = "ti,gate-clock"; + clocks = <&clkout1_mux_ck>; + ti,bit-shift = <23>; + reg = <0x4100>; + }; }; &prcm { diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi index dee9c0c8a096..16c6fd3c4246 100644 --- a/arch/arm/boot/dts/at91sam9260.dtsi +++ b/arch/arm/boot/dts/at91sam9260.dtsi @@ -187,7 +187,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -221,7 +221,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -239,7 +239,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; @@ -257,7 +257,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - ; }; @@ -275,7 +275,7 @@ uart0 { pinctrl_uart0: uart0-0 { atmel,pins = - ; }; }; @@ -283,7 +283,7 @@ uart1 { pinctrl_uart1: uart1-0 { atmel,pins = - ; }; }; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index dba025a98527..5ed3d745ac86 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -329,7 +329,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - , + , ; }; @@ -347,7 +347,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - , + , ; }; @@ -365,7 +365,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - , + , ; }; diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi index 99678abdda93..5c990cfae254 100644 --- a/arch/arm/boot/dts/at91sam9263.dtsi +++ b/arch/arm/boot/dts/at91sam9263.dtsi @@ -183,7 +183,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -201,7 +201,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -219,7 +219,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 691c95ea6175..fd179097a4bf 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -556,7 +556,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - ; }; @@ -574,7 +574,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - ; }; @@ -592,7 +592,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - ; }; @@ -610,7 +610,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - ; }; diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi index 8643b7151565..ea024e4b6e09 100644 --- a/arch/arm/boot/dts/at91sam9rl.dtsi +++ b/arch/arm/boot/dts/at91sam9rl.dtsi @@ -682,7 +682,7 @@ usart0 { pinctrl_usart0: usart0-0 { atmel,pins = - , + , ; }; @@ -721,7 +721,7 @@ usart1 { pinctrl_usart1: usart1-0 { atmel,pins = - , + , ; }; @@ -744,7 +744,7 @@ usart2 { pinctrl_usart2: usart2-0 { atmel,pins = - , + , ; }; @@ -767,7 +767,7 @@ usart3 { pinctrl_usart3: usart3-0 { atmel,pins = - , + , ; }; diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index 1b5a835f66bd..b8c4b5bb265a 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -31,6 +31,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts index 66ab35eccba7..28be0332c1c8 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-a-plus.dts @@ -26,6 +26,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 74ed6d047807..37343148643d 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -27,6 +27,8 @@ pwr { label = "PWR"; gpios = <&expgpio 2 GPIO_ACTIVE_LOW>; + default-state = "keep"; + linux,default-trigger = "default-on"; }; }; diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 7e7aa101d8a4..912ee8778830 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3461,6 +3461,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER13_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3489,6 +3490,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER14_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3517,6 +3519,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; @@ -3545,6 +3548,7 @@ clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; clock-names = "fck"; interrupts = ; + ti,timer-pwm; }; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 73e5011f531a..c5af7530be7c 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -184,6 +184,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x20013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; @@ -238,6 +239,7 @@ device_type = "pci"; ranges = <0x81000000 0 0 0x03000 0 0x00010000 0x82000000 0 0x30013000 0x13000 0 0xffed000>; + dma-ranges = <0x02000000 0x0 0x00000000 0x00000000 0x1 0x00000000>; bus-range = <0x00 0xff>; #interrupt-cells = <1>; num-lanes = <1>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index cdcba3f561c4..9f6fbe4c1fee 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -86,3 +86,8 @@ &usb4_tm { status = "disabled"; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index 93e1eb83bed9..d7d98d2069df 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -796,16 +796,6 @@ clock-div = <1>; }; - ipu1_gfclk_mux: ipu1_gfclk_mux@520 { - #clock-cells = <0>; - compatible = "ti,mux-clock"; - clocks = <&dpll_abe_m2x2_ck>, <&dpll_core_h22x2_ck>; - ti,bit-shift = <24>; - reg = <0x0520>; - assigned-clocks = <&ipu1_gfclk_mux>; - assigned-clock-parents = <&dpll_core_h22x2_ck>; - }; - dummy_ck: dummy_ck { #clock-cells = <0>; compatible = "fixed-clock"; @@ -1564,6 +1554,8 @@ compatible = "ti,clkctrl"; reg = <0x20 0x4>; #clock-cells = <2>; + assigned-clocks = <&ipu1_clkctrl DRA7_IPU1_MMU_IPU1_CLKCTRL 24>; + assigned-clock-parents = <&dpll_core_h22x2_ck>; }; ipu_clkctrl: ipu-clkctrl@50 { diff --git a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi index 6486df3e2942..881cea0b61ba 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-phycore-som.dtsi @@ -183,7 +183,6 @@ pinctrl-0 = <&pinctrl_usdhc4>; bus-width = <8>; non-removable; - vmmc-supply = <&vdd_emmc_1p8>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index a2a4f33a3e3e..cbafadbe86f4 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -629,7 +629,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -642,7 +642,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; no-1-8-v; @@ -776,6 +776,7 @@ &usbh1 { vbus-supply = <®_5p0v_main>; disable-over-current; + maximum-speed = "full-speed"; status = "okay"; }; @@ -1056,7 +1057,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1069,7 +1069,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index d05be3f0e2a7..04717cf69db0 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -336,7 +336,6 @@ assigned-clock-rates = <400000000>; bus-width = <8>; fsl,tuning-step = <2>; - max-frequency = <100000000>; vmmc-supply = <®_module_3v3>; vqmmc-supply = <®_DCDC3>; non-removable; diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi index d8acd7cc7918..497434f0629e 100644 --- a/arch/arm/boot/dts/imx7d.dtsi +++ b/arch/arm/boot/dts/imx7d.dtsi @@ -44,7 +44,7 @@ opp-hz = /bits/ 64 <792000000>; opp-microvolt = <1000000>; clock-latency-ns = <150000>; - opp-supported-hw = <0xd>, <0xf>; + opp-supported-hw = <0xd>, <0x7>; opp-suspend; }; @@ -52,7 +52,7 @@ opp-hz = /bits/ 64 <996000000>; opp-microvolt = <1100000>; clock-latency-ns = <150000>; - opp-supported-hw = <0xc>, <0xf>; + opp-supported-hw = <0xc>, <0x7>; opp-suspend; }; @@ -60,7 +60,7 @@ opp-hz = /bits/ 64 <1200000000>; opp-microvolt = <1225000>; clock-latency-ns = <150000>; - opp-supported-hw = <0x8>, <0xf>; + opp-supported-hw = <0x8>, <0x3>; opp-suspend; }; }; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 2f6977ada447..63d9f4a066e3 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -728,7 +728,7 @@ }; mdio0: mdio@2d24000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; @@ -737,7 +737,7 @@ }; mdio1: mdio@2d64000 { - compatible = "fsl,etsec2-mdio"; + compatible = "gianfar"; device_type = "mdio"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 3c534cd50ee3..db2033f674c6 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -129,8 +129,8 @@ gpu_opp_table: gpu-opp-table { compatible = "operating-points-v2"; - opp-182150000 { - opp-hz = /bits/ 64 <182150000>; + opp-182142857 { + opp-hz = /bits/ 64 <182142857>; opp-microvolt = <1150000>; }; opp-318750000 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 099bf8e711c9..1e8c5d7bc824 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -125,8 +125,8 @@ opp-hz = /bits/ 64 <255000000>; opp-microvolt = <1100000>; }; - opp-364300000 { - opp-hz = /bits/ 64 <364300000>; + opp-364285714 { + opp-hz = /bits/ 64 <364285714>; opp-microvolt = <1100000>; }; opp-425000000 { diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index ebf5b7cfe215..63341635bddf 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -68,6 +68,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; diff --git a/arch/arm/boot/dts/rk3188-bqedison2qc.dts b/arch/arm/boot/dts/rk3188-bqedison2qc.dts index c8b62bbd6a4a..ad1afd403052 100644 --- a/arch/arm/boot/dts/rk3188-bqedison2qc.dts +++ b/arch/arm/boot/dts/rk3188-bqedison2qc.dts @@ -466,9 +466,12 @@ pinctrl-names = "default"; pinctrl-0 = <&sd1_clk>, <&sd1_cmd>, <&sd1_bus4>; vmmcq-supply = <&vccio_wl>; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio3>; interrupts = ; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index f770aace0efd..203d40be70a5 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1188,49 +1188,49 @@ usart0_clk: usart0_clk { #clock-cells = <0>; reg = <12>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart1_clk: usart1_clk { #clock-cells = <0>; reg = <13>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart2_clk: usart2_clk { #clock-cells = <0>; reg = <14>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart3_clk: usart3_clk { #clock-cells = <0>; reg = <15>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; twi0_clk: twi0_clk { reg = <18>; #clock-cells = <0>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi1_clk: twi1_clk { #clock-cells = <0>; reg = <19>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi2_clk: twi2_clk { #clock-cells = <0>; reg = <20>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; mci0_clk: mci0_clk { @@ -1246,19 +1246,19 @@ spi0_clk: spi0_clk { #clock-cells = <0>; reg = <24>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; spi1_clk: spi1_clk { #clock-cells = <0>; reg = <25>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; tcb0_clk: tcb0_clk { #clock-cells = <0>; reg = <26>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; pwm_clk: pwm_clk { @@ -1269,7 +1269,7 @@ adc_clk: adc_clk { #clock-cells = <0>; reg = <29>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; dma0_clk: dma0_clk { @@ -1300,13 +1300,13 @@ ssc0_clk: ssc0_clk { #clock-cells = <0>; reg = <38>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; ssc1_clk: ssc1_clk { #clock-cells = <0>; reg = <39>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; sha_clk: sha_clk { diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index cf06a018ed0f..2470dd3fff25 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -36,13 +36,13 @@ can0_clk: can0_clk { #clock-cells = <0>; reg = <40>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi index 1584035daf51..215802b8db30 100644 --- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi +++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi @@ -22,6 +22,7 @@ tcb1_clk: tcb1_clk { #clock-cells = <0>; reg = <27>; + atmel,clk-output-range = <0 166000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi index 4316bdbdc25d..cb62adbd28ed 100644 --- a/arch/arm/boot/dts/sama5d3_uart.dtsi +++ b/arch/arm/boot/dts/sama5d3_uart.dtsi @@ -41,13 +41,13 @@ uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart1_clk: uart1_clk { #clock-cells = <0>; reg = <17>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/stihxxx-b2120.dtsi b/arch/arm/boot/dts/stihxxx-b2120.dtsi index 60e11045ad76..d051f080e52e 100644 --- a/arch/arm/boot/dts/stihxxx-b2120.dtsi +++ b/arch/arm/boot/dts/stihxxx-b2120.dtsi @@ -46,7 +46,7 @@ /* DAC */ format = "i2s"; mclk-fs = <256>; - frame-inversion = <1>; + frame-inversion; cpu { sound-dai = <&sti_uni_player2>; }; diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts index f3ce477b7bae..9397db0c43de 100644 --- a/arch/arm/boot/dts/stm32f469-disco.dts +++ b/arch/arm/boot/dts/stm32f469-disco.dts @@ -76,6 +76,13 @@ regulator-max-microvolt = <3300000>; }; + vdd_dsi: vdd-dsi { + compatible = "regulator-fixed"; + regulator-name = "vdd_dsi"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + }; + soc { dma-ranges = <0xc0000000 0x0 0x10000000>; }; @@ -155,6 +162,7 @@ compatible = "orisetech,otm8009a"; reg = <0>; /* dsi virtual channel (0..3) */ reset-gpios = <&gpioh 7 GPIO_ACTIVE_LOW>; + power-supply = <&vdd_dsi>; status = "okay"; port { diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi index fe773c72a69b..b4f1673df9ee 100644 --- a/arch/arm/boot/dts/sun8i-h3.dtsi +++ b/arch/arm/boot/dts/sun8i-h3.dtsi @@ -80,7 +80,7 @@ #cooling-cells = <2>; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <1>; @@ -90,7 +90,7 @@ #cooling-cells = <2>; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <2>; @@ -100,7 +100,7 @@ #cooling-cells = <2>; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a7"; device_type = "cpu"; reg = <3>; @@ -111,6 +111,15 @@ }; }; + pmu { + compatible = "arm,cortex-a7-pmu"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + timer { compatible = "arm,armv7-timer"; interrupts = , diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 519ff58e67b3..0afcae9f7cf8 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -178,6 +178,7 @@ CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_FUNCTION_PROFILER=y CONFIG_TEST_KSTRTOX=y +CONFIG_DEBUG_FS=y CONFIG_KGDB=y CONFIG_KGDB_KDB=y CONFIG_STRICT_DEVMEM=y diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index fde84f123fbb..ead8348ec999 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -38,6 +38,7 @@ CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_CHACHA20_NEON=m +CONFIG_KALLSYMS_ALL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y @@ -92,6 +93,7 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y @@ -291,6 +293,7 @@ CONFIG_CROS_EC_SPI=y CONFIG_COMMON_CLK_MAX77686=y CONFIG_COMMON_CLK_S2MPS11=y CONFIG_EXYNOS_IOMMU=y +CONFIG_PM_DEVFREQ=y CONFIG_DEVFREQ_GOV_PERFORMANCE=y CONFIG_DEVFREQ_GOV_POWERSAVE=y CONFIG_DEVFREQ_GOV_USERSPACE=y @@ -356,4 +359,7 @@ CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_DETECT_HUNG_TASK is not set CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_USER=y diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 6ebbb2b241d2..6fdb0ac62b3d 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skcipher_request *req, if (nbytes < walk.total) nbytes = round_down(nbytes, walk.stride); - if (!neon) { + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, nbytes, state, ctx->nrounds); state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); @@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_request *req, bool neon) chacha_init_generic(state, ctx->key, req->iv); - if (!neon) { + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { hchacha_block_arm(state, subctx.key, ctx->nrounds); } else { kernel_neon_begin(); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 9b118516d2db..08d9805f613b 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -14,13 +14,25 @@ #include /* arm64 compatibility macros */ +#define PSR_AA32_MODE_FIQ FIQ_MODE +#define PSR_AA32_MODE_SVC SVC_MODE #define PSR_AA32_MODE_ABT ABT_MODE #define PSR_AA32_MODE_UND UND_MODE #define PSR_AA32_T_BIT PSR_T_BIT +#define PSR_AA32_F_BIT PSR_F_BIT #define PSR_AA32_I_BIT PSR_I_BIT #define PSR_AA32_A_BIT PSR_A_BIT #define PSR_AA32_E_BIT PSR_E_BIT #define PSR_AA32_IT_MASK PSR_IT_MASK +#define PSR_AA32_GE_MASK 0x000f0000 +#define PSR_AA32_DIT_BIT 0x00200000 +#define PSR_AA32_PAN_BIT 0x00400000 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_Q_BIT PSR_Q_BIT +#define PSR_AA32_V_BIT PSR_V_BIT +#define PSR_AA32_C_BIT PSR_C_BIT +#define PSR_AA32_Z_BIT PSR_Z_BIT +#define PSR_AA32_N_BIT PSR_N_BIT unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num); @@ -41,6 +53,11 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) *__vcpu_spsr(vcpu) = v; } +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + return spsr; +} + static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu, u8 reg_num) { @@ -182,6 +199,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 7c0eddb0adb2..32fbf82e3ebc 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -14,6 +14,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm/include/asm/vdso/vsyscall.h b/arch/arm/include/asm/vdso/vsyscall.h index c4166f317071..cff87d8d30da 100644 --- a/arch/arm/include/asm/vdso/vsyscall.h +++ b/arch/arm/include/asm/vdso/vsyscall.h @@ -34,9 +34,9 @@ struct vdso_data *__arm_get_k_vdso_data(void) #define __arch_get_k_vdso_data __arm_get_k_vdso_data static __always_inline -int __arm_update_vdso_data(void) +bool __arm_update_vdso_data(void) { - return !cntvct_ok; + return cntvct_ok; } #define __arch_update_vdso_data __arm_update_vdso_data diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index c89ac1b9d28b..e0330a25e1c6 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -94,6 +94,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 95b2e1ce559c..f8016e3db65d 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -118,7 +118,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index d5af6aedc02c..52665f30d236 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -691,6 +691,12 @@ static void __init at91_pm_use_default_mode(int pm_mode) soc_pm.data.suspend_mode = AT91_PM_ULP0; } +static const struct of_device_id atmel_shdwc_ids[] = { + { .compatible = "atmel,sama5d2-shdwc" }, + { .compatible = "microchip,sam9x60-shdwc" }, + { /* sentinel. */ } +}; + static void __init at91_pm_modes_init(void) { struct device_node *np; @@ -700,7 +706,7 @@ static void __init at91_pm_modes_init(void) !at91_is_pm_mode_active(AT91_PM_ULP1)) return; - np = of_find_compatible_node(NULL, NULL, "atmel,sama5d2-shdwc"); + np = of_find_matching_node(NULL, atmel_shdwc_ids); if (!np) { pr_warn("%s: failed to find shdwc!\n", __func__); goto ulp1_default; @@ -751,6 +757,7 @@ static const struct of_device_id atmel_pmc_ids[] __initconst = { { .compatible = "atmel,sama5d3-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d4-pmc", .data = &pmc_infos[1] }, { .compatible = "atmel,sama5d2-pmc", .data = &pmc_infos[1] }, + { .compatible = "microchip,sam9x60-pmc", .data = &pmc_infos[1] }, { /* sentinel */ }, }; diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 35ff620537e6..03506ce46149 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -91,6 +91,8 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 912aeceb4ff8..5aa5796cff0e 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -109,17 +109,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 062391ff13da..1eabf2d2834b 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -327,17 +327,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 880bc2a5cada..7f7002dc2b21 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -11,7 +11,7 @@ config ARCH_NPCM7XX depends on ARCH_MULTI_V7 select PINCTRL_NPCM7XX select NPCM7XX_TIMER - select ARCH_REQUIRE_GPIOLIB + select GPIOLIB select CACHE_L2X0 select ARM_GIC select HAVE_ARM_TWD if SMP diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index 3341a12bbb9c..c796a97194ac 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -370,6 +370,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -630,7 +638,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e822af0d9219..9414d72f664b 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -221,7 +221,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn - 1, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 3ef204137e73..054be44d1cdb 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -324,7 +324,7 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static inline void +static inline void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 1f012c506434..cd3414898d10 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -16,7 +16,7 @@ OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index e92c4de5bf3b..7c775a918a4e 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -54,21 +54,21 @@ enable-method = "psci"; }; - cpu@1 { + cpu1: cpu@1 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <1>; enable-method = "psci"; }; - cpu@2 { + cpu2: cpu@2 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <2>; enable-method = "psci"; }; - cpu@3 { + cpu3: cpu@3 { compatible = "arm,cortex-a53"; device_type = "cpu"; reg = <3>; @@ -76,6 +76,16 @@ }; }; + pmu { + compatible = "arm,cortex-a53-pmu", + "arm,armv8-pmuv3"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 29824081b43b..24ffe2dcbddb 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -70,6 +70,16 @@ clock-output-names = "ext_osc32k"; }; + pmu { + compatible = "arm,cortex-a53-pmu", + "arm,armv8-pmuv3"; + interrupts = , + , + , + ; + interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>; + }; + psci { compatible = "arm,psci-0.2"; method = "smc"; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts index f82f25c1a5f9..d5dc12878dfe 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxm-khadas-vim2.dts @@ -327,7 +327,7 @@ #size-cells = <0>; bus-width = <4>; - max-frequency = <50000000>; + max-frequency = <60000000>; non-removable; disable-wp; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index a8bb3fa9fec9..cb1b48f5b8b1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -593,6 +593,7 @@ compatible = "brcm,bcm43438-bt"; interrupt-parent = <&gpio_intc>; interrupts = <95 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "host-wakeup"; shutdown-gpios = <&gpio GPIOX_17 GPIO_ACTIVE_HIGH>; max-speed = <2000000>; clocks = <&wifi32k>; diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 62ab0d54ff71..335fff762451 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -161,10 +161,10 @@ bus-range = <0x0 0x1>; reg = <0x0 0x40000000 0x0 0x10000000>; ranges = <0x2000000 0x0 0x50000000 0x0 0x50000000 0x0 0x10000000>; - interrupt-map = <0 0 0 1 &gic GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 2 &gic GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 3 &gic GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, - <0 0 0 4 &gic GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; + interrupt-map = <0 0 0 1 &gic 0 0 GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gic 0 0 GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gic 0 0 GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gic 0 0 GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; interrupt-map-mask = <0x0 0x0 0x0 0x7>; msi-map = <0x0 &its 0x0 0x10000>; iommu-map = <0x0 &smmu 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi index 6082ae022136..d237162a8744 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043-post.dtsi @@ -20,6 +20,8 @@ }; &fman0 { + fsl,erratum-a050385; + /* these aliases provide the FMan ports mapping */ enet0: ethernet@e0000 { }; diff --git a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts index d3d26cca7d52..13460a360c6a 100644 --- a/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts +++ b/arch/arm64/boot/dts/freescale/imx8qxp-mek.dts @@ -52,11 +52,6 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0>; }; - - ethphy1: ethernet-phy@1 { - compatible = "ethernet-phy-ieee802.3-c22"; - reg = <1>; - }; }; }; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index d43e1299c8ef..b47f2ce160a4 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -102,7 +102,7 @@ }; gmac0: ethernet@ff800000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff800000 0x2000>; interrupts = <0 90 4>; interrupt-names = "macirq"; @@ -117,7 +117,7 @@ }; gmac1: ethernet@ff802000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff802000 0x2000>; interrupts = <0 91 4>; interrupt-names = "macirq"; @@ -132,7 +132,7 @@ }; gmac2: ethernet@ff804000 { - compatible = "altr,socfpga-stmmac", "snps,dwmac-3.74a", "snps,dwmac"; + compatible = "altr,socfpga-stmmac-a10-s10", "snps,dwmac-3.74a", "snps,dwmac"; reg = <0xff804000 0x2000>; interrupts = <0 92 4>; interrupt-names = "macirq"; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts index bd4aab6092e0..e31813a4f972 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts @@ -143,6 +143,7 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy1 0>; sfp = <&sfp_eth0>; }; @@ -150,11 +151,14 @@ phy-mode = "sgmii"; status = "okay"; managed = "in-band-status"; + phys = <&comphy0 1>; sfp = <&sfp_eth1>; }; &usb3 { status = "okay"; + phys = <&usb2_utmi_otg_phy>; + phy-names = "usb2-utmi-otg-phy"; }; &uart0 { diff --git a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts index bd881497b872..a211a046b2f2 100644 --- a/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts +++ b/arch/arm64/boot/dts/marvell/armada-8040-clearfog-gt-8k.dts @@ -408,6 +408,8 @@ reg = <5>; label = "cpu"; ethernet = <&cp1_eth2>; + phy-mode = "2500base-x"; + managed = "in-band-status"; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 4ca2e7b44559..1eed3c41521a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -1602,6 +1602,8 @@ interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -1632,6 +1634,8 @@ interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index 5f101a20a20a..e08fcb426bbf 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -9,6 +9,7 @@ / { aliases { serial0 = &blsp2_uart1; + serial1 = &blsp1_uart3; }; chosen { diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index fc7838ea9a01..385b46686194 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -987,7 +987,7 @@ tcsr_mutex_regs: syscon@1f40000 { compatible = "syscon"; - reg = <0x01f40000 0x20000>; + reg = <0x01f40000 0x40000>; }; tlmm: pinctrl@3400000 { diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index 501a7330dbc8..522d3ef72df5 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -73,6 +73,7 @@ regulator-always-on; regulator-boot-on; regulator-name = "vdd_apc"; + regulator-initial-mode = <1>; regulator-min-microvolt = <1048000>; regulator-max-microvolt = <1384000>; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index d100f46791a6..912ba745c0fc 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -529,6 +529,8 @@ vdd-1.8-xo-supply = <&vreg_l7a_1p8>; vdd-1.3-rfa-supply = <&vreg_l17a_1p3>; vdd-3.3-ch0-supply = <&vreg_l25a_3p3>; + + qcom,snoc-host-cap-8bit-quirk; }; /* PINCTRL - additions to nodes defined in sdm845.dtsi */ diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index b38f9d442fc0..e6d700f8c194 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -636,7 +636,6 @@ /* audio_clkout0/1/2/3 */ #clock-cells = <1>; clock-frequency = <12288000 11289600>; - clkout-lr-synchronous; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 8812b70f3911..5acd5ce714d4 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -826,7 +826,7 @@ interrupts = ; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -841,7 +841,7 @@ interrupts = ; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -856,7 +856,7 @@ interrupts = ; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3308.dtsi b/arch/arm64/boot/dts/rockchip/rk3308.dtsi index 8bdc66c62975..fa0d55f1a587 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3308.dtsi @@ -591,7 +591,7 @@ bus-width = <4>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; @@ -606,7 +606,7 @@ bus-width = <8>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; status = "disabled"; @@ -619,7 +619,7 @@ bus-width = <4>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu-drv", "ciu-sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; max-frequency = <150000000>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts index c706db0ee9ec..76f5db696009 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-firefly.dts @@ -669,9 +669,12 @@ vqmmc-supply = &vcc1v8_s3; /* IO line */ vmmc-supply = &vcc_sdio; /* card's power */ + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 4944d78a0a1c..e87a04477440 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -654,9 +654,12 @@ sd-uhs-sdr104; vqmmc-supply = <&vcc1v8_s3>; vmmc-supply = <&vccio_sd>; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts index 2a127985ab17..d3ed8e5e770f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopc-t4.dts @@ -94,33 +94,6 @@ }; }; -&gpu_thermal { - trips { - gpu_warm: gpu_warm { - temperature = <55000>; - hysteresis = <2000>; - type = "active"; - }; - - gpu_hot: gpu_hot { - temperature = <65000>; - hysteresis = <2000>; - type = "active"; - }; - }; - cooling-maps { - map1 { - trip = <&gpu_warm>; - cooling-device = <&fan THERMAL_NO_LIMIT 1>; - }; - - map2 { - trip = <&gpu_hot>; - cooling-device = <&fan 2 THERMAL_NO_LIMIT>; - }; - }; -}; - &pinctrl { ir { ir_rx: ir-rx { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts index 0541dfce924d..9c659f3115c8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-orangepi.dts @@ -648,9 +648,12 @@ pinctrl-names = "default"; pinctrl-0 = <&sdio0_bus4 &sdio0_cmd &sdio0_clk>; sd-uhs-sdr104; + #address-cells = <1>; + #size-cells = <0>; status = "okay"; brcmf: wifi@1 { + reg = <1>; compatible = "brcm,bcm4329-fmac"; interrupt-parent = <&gpio0>; interrupts = ; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 1e4c2b78d66d..68d478af7a3e 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -43,6 +43,7 @@ smmu0: smmu@36600000 { compatible = "arm,smmu-v3"; reg = <0x0 0x36600000 0x0 0x100000>; + power-domains = <&k3_pds 229 TI_SCI_PD_EXCLUSIVE>; interrupt-parent = <&gic500>; interrupts = , ; diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 522cf004ce65..196aedd0c20c 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -259,7 +259,7 @@ static int ghash_setkey(struct crypto_shash *tfm, static struct shash_alg ghash_alg[] = {{ .base.cra_name = "ghash", .base.cra_driver_name = "ghash-neon", - .base.cra_priority = 100, + .base.cra_priority = 150, .base.cra_blocksize = GHASH_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct ghash_key), .base.cra_module = THIS_MODULE, diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b9f8d787eea9..324e7d5ab37e 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -35,13 +35,16 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -62,15 +65,14 @@ static inline void apply_alternatives_module(void *start, size_t length) { } * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -78,17 +80,25 @@ static inline void apply_alternatives_module(void *start, size_t length) { } ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 574808b9df4c..da3280f639cd 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -14,6 +14,7 @@ static inline void __lse_atomic_##op(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %w[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -30,6 +31,7 @@ ATOMIC_OP(add, stadd) static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %w[i], %w[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -58,6 +60,7 @@ static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -77,6 +80,7 @@ ATOMIC_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic_and(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -87,6 +91,7 @@ static inline void __lse_atomic_and(int i, atomic_t *v) static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -106,6 +111,7 @@ ATOMIC_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic_sub(int i, atomic_t *v) { asm volatile( + __LSE_PREAMBLE " neg %w[i], %w[i]\n" " stadd %w[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -118,6 +124,7 @@ static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \ u32 tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[tmp], %[v]\n" \ " add %w[i], %w[i], %w[tmp]" \ @@ -139,6 +146,7 @@ ATOMIC_OP_SUB_RETURN( , al, "memory") static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -159,6 +167,7 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") static inline void __lse_atomic64_##op(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op " %[i], %[v]\n" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v)); \ @@ -175,6 +184,7 @@ ATOMIC64_OP(add, stadd) static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\ { \ asm volatile( \ + __LSE_PREAMBLE \ " " #asm_op #mb " %[i], %[i], %[v]" \ : [i] "+r" (i), [v] "+Q" (v->counter) \ : "r" (v) \ @@ -203,6 +213,7 @@ static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ : [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \ @@ -222,6 +233,7 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") static inline void __lse_atomic64_and(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " mvn %[i], %[i]\n" " stclr %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -232,6 +244,7 @@ static inline void __lse_atomic64_and(s64 i, atomic64_t *v) static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -251,6 +264,7 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") static inline void __lse_atomic64_sub(s64 i, atomic64_t *v) { asm volatile( + __LSE_PREAMBLE " neg %[i], %[i]\n" " stadd %[i], %[v]" : [i] "+&r" (i), [v] "+Q" (v->counter) @@ -263,6 +277,7 @@ static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v) \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %x[tmp], %[v]\n" \ " add %[i], %[i], %x[tmp]" \ @@ -284,6 +299,7 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ asm volatile( \ + __LSE_PREAMBLE \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]" \ : [i] "+&r" (i), [v] "+Q" (v->counter) \ @@ -305,6 +321,7 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile( + __LSE_PREAMBLE "1: ldr %x[tmp], %[v]\n" " subs %[ret], %x[tmp], #1\n" " b.lt 2f\n" @@ -332,6 +349,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ + __LSE_PREAMBLE \ " mov %" #w "[tmp], %" #w "[old]\n" \ " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ " mov %" #w "[ret], %" #w "[tmp]" \ @@ -379,6 +397,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ \ asm volatile( \ + __LSE_PREAMBLE \ " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index b0d53a265f1d..935d2aa231bf 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -4,6 +4,9 @@ */ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H + +#include + #ifdef CONFIG_COMPAT /* @@ -13,8 +16,6 @@ #include #include -#include - #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ #define COMPAT_UTS_MACHINE "armv8b\0\0" @@ -113,23 +114,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - #define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) #define COMPAT_MINSIGSTKSZ 2048 diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 72acd2db167f..ec213b4a1650 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -38,7 +38,7 @@ static inline void local_daif_mask(void) trace_hardirqs_off(); } -static inline unsigned long local_daif_save(void) +static inline unsigned long local_daif_save_flags(void) { unsigned long flags; @@ -50,6 +50,15 @@ static inline unsigned long local_daif_save(void) flags |= PSR_I_BIT; } + return flags; +} + +static inline unsigned long local_daif_save(void) +{ + unsigned long flags; + + flags = local_daif_save_flags(); + local_daif_mask(); return flags; diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5efe5ca8fecf..53ea7637b7b2 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -219,6 +219,38 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } +/* + * The layout of SPSR for an AArch32 state is different when observed from an + * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 + * view given an AArch64 view. + * + * In ARM DDI 0487E.a see: + * + * - The AArch64 view (SPSR_EL2) in section C5.2.18, page C5-426 + * - The AArch32 view (SPSR_abt) in section G8.2.126, page G8-6256 + * - The AArch32 view (SPSR_und) in section G8.2.132, page G8-6280 + * + * Which show the following differences: + * + * | Bit | AA64 | AA32 | Notes | + * +-----+------+------+-----------------------------| + * | 24 | DIT | J | J is RES0 in ARMv8 | + * | 21 | SS | DIT | SS doesn't exist in AArch32 | + * + * ... and all other bits are (currently) common. + */ +static inline unsigned long host_spsr_to_spsr32(unsigned long spsr) +{ + const unsigned long overlap = BIT(24) | BIT(21); + unsigned long dit = !!(spsr & PSR_AA32_DIT_BIT); + + spsr &= ~overlap; + + spsr |= dit << 21; + + return spsr; +} + static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu) { u32 mode; @@ -283,6 +315,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 02b5c48fd467..b204501a0c39 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -10,13 +10,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 80b388278149..5de132100b6d 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -6,6 +6,8 @@ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) +#define __LSE_PREAMBLE ".arch_extension lse\n" + #include #include #include @@ -14,8 +16,6 @@ #include #include -__asm__(".arch_extension lse"); - extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -34,7 +34,7 @@ static inline bool system_uses_lse_atomics(void) /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + ALTERNATIVE(llsc, __LSE_PREAMBLE lse, ARM64_HAS_LSE_ATOMICS) #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index a4f9ca5479b0..4d94676e5a8b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -213,7 +213,7 @@ static inline unsigned long kaslr_offset(void) ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #define untagged_addr(addr) ({ \ - u64 __addr = (__force u64)addr; \ + u64 __addr = (__force u64)(addr); \ __addr &= __untagged_addr(__addr); \ (__force __typeof__(addr))__addr; \ }) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index fbebb411ae20..bf57308fcd63 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -62,6 +62,7 @@ #define PSR_AA32_I_BIT 0x00000080 #define PSR_AA32_A_BIT 0x00000100 #define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_PAN_BIT 0x00400000 #define PSR_AA32_SSBS_BIT 0x00800000 #define PSR_AA32_DIT_BIT 0x01000000 #define PSR_AA32_Q_BIT 0x08000000 diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 5af82587909e..8c1b73dc8f55 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -25,8 +25,8 @@ #define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 -#define __NR_compat_clock_getres 247 #define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_getres 264 #define __NR_compat_clock_gettime64 403 #define __NR_compat_clock_getres_time64 406 diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 7ed9294e2004..d1bb5b69f1ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -49,6 +49,7 @@ #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 +#define PSR_DIT_BIT 0x01000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 #define PSR_Z_BIT 0x40000000 diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 3a58e9db5cfe..a100483b47c4 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -274,7 +274,7 @@ int apei_claim_sea(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) return err; - current_flags = arch_local_save_flags(); + current_flags = local_daif_save_flags(); /* * SEA can interrupt SError, mask it and describe this as an NMI so diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 04cf64e9f0c9..32eff833a43c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -32,9 +32,7 @@ static unsigned long elf_hwcap __read_mostly; #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -1368,7 +1366,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1596,6 +1594,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .match_list = list, \ } +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ + } + #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { @@ -1669,8 +1673,35 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7c6a0a41676f..d54d165b286a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -653,6 +653,7 @@ el0_sync: mov x0, sp bl el0_sync_handler b ret_to_user +ENDPROC(el0_sync) #ifdef CONFIG_COMPAT .align 6 @@ -661,16 +662,18 @@ el0_sync_compat: mov x0, sp bl el0_sync_compat_handler b ret_to_user -ENDPROC(el0_sync) +ENDPROC(el0_sync_compat) .align 6 el0_irq_compat: kernel_entry 0, 32 b el0_irq_naked +ENDPROC(el0_irq_compat) el0_error_compat: kernel_entry 0, 32 b el0_error_naked +ENDPROC(el0_error_compat) #endif .align 6 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3eb338f14386..94289d126993 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -269,6 +269,7 @@ static void sve_free(struct task_struct *task) */ static void task_fpsimd_load(void) { + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -289,6 +290,7 @@ static void fpsimd_save(void) this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -1092,6 +1094,7 @@ void fpsimd_bind_task_to_cpu(void) struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; last->sve_vl = current->thread.sve_vl; @@ -1114,6 +1117,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); + WARN_ON(!system_supports_fpsimd()); WARN_ON(!in_softirq() && !irqs_disabled()); last->st = st; @@ -1128,8 +1132,19 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } get_cpu_fpsimd_context(); @@ -1148,7 +1163,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct user_fpsimd_state const *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; get_cpu_fpsimd_context(); @@ -1179,7 +1194,13 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) void fpsimd_flush_task_state(struct task_struct *t) { t->thread.fpsimd_cpu = NR_CPUS; - + /* + * If we don't support fpsimd, bail out after we have + * reset the fpsimd_cpu for this task and clear the + * FPSTATE. + */ + if (!system_supports_fpsimd()) + return; barrier(); set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); @@ -1193,6 +1214,7 @@ void fpsimd_flush_task_state(struct task_struct *t) */ static void fpsimd_flush_cpu_state(void) { + WARN_ON(!system_supports_fpsimd()); __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -1203,6 +1225,8 @@ static void fpsimd_flush_cpu_state(void) */ void fpsimd_save_and_flush_cpu_state(void) { + if (!system_supports_fpsimd()) + return; WARN_ON(preemptible()); __get_cpu_fpsimd_context(); fpsimd_save(); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index d54586d5b031..fab013c5ee8c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -466,6 +466,13 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || test_tsk_thread_flag(next, TIF_SSBD)) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6771c399d40c..cd6e5fa48b9c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -615,6 +615,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -637,6 +644,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -676,6 +686,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, { int ret; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = __fpr_set(target, regset, pos, count, kbuf, ubuf, 0); if (ret) return ret; @@ -1134,6 +1147,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -1348,6 +1362,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; if (target == current) @@ -1381,6 +1398,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.uw.fpsimd_state; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1438,6 +1458,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d4ed9a19d8fe..5407bf5d98ac 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -958,11 +958,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -975,10 +986,10 @@ void smp_send_stop(void) /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (num_other_online_cpus() && timeout--) udelay(1); - if (num_online_cpus() > 1) + if (num_other_online_cpus()) pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(cpu_online_mask)); @@ -1001,7 +1012,11 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) { + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) { sdei_mask_local_cpu(); return; } @@ -1009,7 +1024,7 @@ void crash_smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 43487f035385..7a7e425616b5 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -101,7 +101,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -197,6 +197,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; + /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 72fbbd86eb5e..e5816d885761 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -28,7 +28,15 @@ /* Check whether the FP regs were dirtied while in the host-side run loop: */ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + /* + * When the system doesn't support FP/SIMD, we cannot rely on + * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an + * abort on the very first access to FP and thus we should never + * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always + * trap the accesses. + */ + if (!system_supports_fpsimd() || + vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index ccdb6a051ab2..6aafc2825c1c 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,9 +14,6 @@ #include #include -#define PSTATE_FAULT_BITS_64 (PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | \ - PSR_I_BIT | PSR_D_BIT) - #define CURRENT_EL_SP_EL0_VECTOR 0x0 #define CURRENT_EL_SP_ELx_VECTOR 0x200 #define LOWER_EL_AArch64_VECTOR 0x400 @@ -50,6 +47,69 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; } +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +{ + unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= PSR_MODE_EL1h; + + return new; +} + static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); @@ -59,7 +119,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -94,7 +154,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); vcpu_write_spsr(vcpu, cpsr); /* diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index dab1fea4752a..a4f48c1ac28c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -13,52 +13,46 @@ #include /* - * The LSB of the random hyp VA tag or 0 if no randomization is used. + * The LSB of the HYP VA tag */ static u8 tag_lsb; /* - * The random hyp VA tag value with the region bit if hyp randomization is used + * The HYP VA tag value with the region bit */ static u64 tag_val; static u64 va_mask; +/* + * We want to generate a hyp VA with the following format (with V == + * vabits_actual): + * + * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 + * --------------------------------------------------------- + * | 0000000 | hyp_va_msb | random tag | kern linear VA | + * |--------- tag_val -----------|----- va_mask ---| + * + * which does not conflict with the idmap regions. + */ __init void kvm_compute_layout(void) { phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); u64 hyp_va_msb; - int kva_msb; /* Where is my RAM region? */ hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); hyp_va_msb ^= BIT(vabits_actual - 1); - kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ + tag_lsb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (vabits_actual - 1)) { - /* - * No space in the address, let's compute the mask so - * that it covers (vabits_actual - 1) bits, and the region - * bit. The tag stays set to zero. - */ - va_mask = BIT(vabits_actual - 1) - 1; - va_mask |= hyp_va_msb; - } else { - /* - * We do have some free bits to insert a random tag. - * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == vabits_actual): - * - * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 - * --------------------------------------------------------- - * | 0000000 | hyp_va_msb | random tag | kern linear VA | - */ - tag_lsb = kva_msb; - va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); - tag_val |= hyp_va_msb; - tag_val >>= tag_lsb; + va_mask = GENMASK_ULL(tag_lsb - 1, 0); + tag_val = hyp_va_msb; + + if (tag_lsb != (vabits_actual - 1)) { + /* We have some free bits to insert a random tag. */ + tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } + tag_val >>= tag_lsb; } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -117,11 +111,11 @@ void __init kvm_update_va_mask(struct alt_instr *alt, * VHE doesn't need any address translation, let's NOP * everything. * - * Alternatively, if we don't have any spare bits in - * the address, NOP everything after masking that - * kernel VA. + * Alternatively, if the tag is zero (because the layout + * dictates it and we don't have any spare bits in the + * address), NOP everything after masking the kernel VA. */ - if (has_vhe() || (!tag_lsb && i > 0)) { + if (has_vhe() || (!tag_val && i > 0)) { updptr[i] = cpu_to_le32(aarch64_insn_gen_nop()); continue; } diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index da09c884cc30..f00b394988a2 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -37,6 +37,7 @@ config CSKY select GX6605S_TIMER if CPU_CK610 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_AUDITSYSCALL + select HAVE_COPY_THREAD_TLS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -75,7 +76,7 @@ config CPU_HAS_TLBI config CPU_HAS_LDSTEX bool help - For SMP, CPU needs "ldex&stex" instrcutions to atomic operations. + For SMP, CPU needs "ldex&stex" instructions for atomic operations. config CPU_NEED_TLBSYNC bool diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 7ab78bd0f3b1..f35a9f3315ee 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -16,14 +16,16 @@ #define LSAVE_A4 40 #define LSAVE_A5 44 +#define usp ss1 + .macro USPTOKSP - mtcr sp, ss1 + mtcr sp, usp mfcr sp, ss0 .endm .macro KSPTOUSP mtcr sp, ss0 - mfcr sp, ss1 + mfcr sp, usp .endm .macro SAVE_ALL epc_inc @@ -45,7 +47,13 @@ add lr, r13 stw lr, (sp, 8) + mov lr, sp + addi lr, 32 + addi lr, 32 + addi lr, 16 + bt 2f mfcr lr, ss1 +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -79,9 +87,10 @@ ldw a0, (sp, 12) mtcr a0, epsr btsti a0, 31 + bt 1f ldw a0, (sp, 16) mtcr a0, ss1 - +1: ldw a0, (sp, 24) ldw a1, (sp, 28) ldw a2, (sp, 32) @@ -102,9 +111,9 @@ addi sp, 32 addi sp, 8 - bt 1f + bt 2f KSPTOUSP -1: +2: rte .endm diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index 9897a16b45e5..94a7a58765df 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -31,7 +31,13 @@ mfcr lr, epsr stw lr, (sp, 12) + btsti lr, 31 + bf 1f + addi lr, sp, 152 + br 2f +1: mfcr lr, usp +2: stw lr, (sp, 16) stw a0, (sp, 20) @@ -64,8 +70,10 @@ mtcr a0, epc ldw a0, (sp, 12) mtcr a0, epsr + btsti a0, 31 ldw a0, (sp, 16) mtcr a0, usp + mtcr a0, ss0 #ifdef CONFIG_CPU_HAS_HILO ldw a0, (sp, 140) @@ -86,6 +94,9 @@ addi sp, 40 ldm r16-r30, (sp) addi sp, 72 + bf 1f + mfcr sp, ss0 +1: rte .endm diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h index 211c983c7282..ba4018929733 100644 --- a/arch/csky/include/uapi/asm/unistd.h +++ b/arch/csky/include/uapi/asm/unistd.h @@ -1,7 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS #include diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S index 5b84f11485ae..3821ef9b7567 100644 --- a/arch/csky/kernel/atomic.S +++ b/arch/csky/kernel/atomic.S @@ -17,10 +17,12 @@ ENTRY(csky_cmpxchg) mfcr a3, epc addi a3, TRAP0_SIZE - subi sp, 8 + subi sp, 16 stw a3, (sp, 0) mfcr a3, epsr stw a3, (sp, 4) + mfcr a3, usp + stw a3, (sp, 8) psrset ee #ifdef CONFIG_CPU_HAS_LDSTEX @@ -47,7 +49,9 @@ ENTRY(csky_cmpxchg) mtcr a3, epc ldw a3, (sp, 4) mtcr a3, epsr - addi sp, 8 + ldw a3, (sp, 8) + mtcr a3, usp + addi sp, 16 KSPTOUSP rte END(csky_cmpxchg) diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f320d9248a22..397962e11bd1 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -34,10 +34,11 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sw->r15; } -int copy_thread(unsigned long clone_flags, +int copy_thread_tls(unsigned long clone_flags, unsigned long usp, unsigned long kthread_arg, - struct task_struct *p) + struct task_struct *p, + unsigned long tls) { struct switch_stack *childstack; struct pt_regs *childregs = task_pt_regs(p); @@ -64,7 +65,7 @@ int copy_thread(unsigned long clone_flags, childregs->usp = usp; if (clone_flags & CLONE_SETTLS) task_thread_info(p)->tp_value = childregs->tls - = childregs->regs[0]; + = tls; childregs->a0 = 0; childstack->r15 = (unsigned long) ret_from_fork; diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index b753d382e4ce..0bb0954d5570 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -120,7 +120,7 @@ void __init setup_smp_ipi(void) int rc; if (ipi_irq == 0) - panic("%s IRQ mapping failed\n", __func__); + return; rc = request_percpu_irq(ipi_irq, handle_ipi, "IPI Interrupt", &ipi_dummy_dev); diff --git a/arch/csky/mm/Makefile b/arch/csky/mm/Makefile index c94ef6481098..efb7ebab342b 100644 --- a/arch/csky/mm/Makefile +++ b/arch/csky/mm/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only ifeq ($(CONFIG_CPU_HAS_CACHEV2),y) obj-y += cachev2.o +CFLAGS_REMOVE_cachev2.o = $(CC_FLAGS_FTRACE) else obj-y += cachev1.o +CFLAGS_REMOVE_cachev1.o = $(CC_FLAGS_FTRACE) endif obj-y += dma-mapping.o diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index d4c2292ea46b..00e96278b377 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa69..dcba53803fa5 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index f03fdc95143e..4b1d3ba3a8a2 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -17,7 +17,7 @@ quiet_cmd_ls3_llsc = LLSCCHK $@ cmd_ls3_llsc = $(CMD_LS3_LLSC) $@ CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/boot/Makefile b/arch/mips/boot/Makefile index 528bd73d530a..4ed45ade32a1 100644 --- a/arch/mips/boot/Makefile +++ b/arch/mips/boot/Makefile @@ -123,7 +123,7 @@ $(obj)/vmlinux.its.S: $(addprefix $(srctree)/arch/mips/$(PLATFORM)/,$(ITS_INPUTS targets += vmlinux.its targets += vmlinux.gz.its targets += vmlinux.bz2.its -targets += vmlinux.lzmo.its +targets += vmlinux.lzma.its targets += vmlinux.lzo.its quiet_cmd_cpp_its_S = ITS $@ diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 37b93166bf22..c340f947baa0 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -4,6 +4,8 @@ #include "jz4780.dtsi" #include #include +#include +#include / { compatible = "img,ci20", "ingenic,jz4780"; @@ -163,63 +165,71 @@ regulators { vddcore: SUDCDC1 { - regulator-name = "VDDCORE"; + regulator-name = "DCDC_REG1"; regulator-min-microvolt = <1100000>; regulator-max-microvolt = <1100000>; regulator-always-on; }; vddmem: SUDCDC2 { - regulator-name = "VDDMEM"; + regulator-name = "DCDC_REG2"; regulator-min-microvolt = <1500000>; regulator-max-microvolt = <1500000>; regulator-always-on; }; vcc_33: SUDCDC3 { - regulator-name = "VCC33"; + regulator-name = "DCDC_REG3"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; regulator-always-on; }; vcc_50: SUDCDC4 { - regulator-name = "VCC50"; + regulator-name = "SUDCDC_REG4"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; regulator-always-on; }; vcc_25: LDO_REG5 { - regulator-name = "VCC25"; + regulator-name = "LDO_REG5"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; regulator-always-on; }; wifi_io: LDO_REG6 { - regulator-name = "WIFIIO"; + regulator-name = "LDO_REG6"; regulator-min-microvolt = <2500000>; regulator-max-microvolt = <2500000>; regulator-always-on; }; vcc_28: LDO_REG7 { - regulator-name = "VCC28"; + regulator-name = "LDO_REG7"; regulator-min-microvolt = <2800000>; regulator-max-microvolt = <2800000>; regulator-always-on; }; vcc_15: LDO_REG8 { - regulator-name = "VCC15"; + regulator-name = "LDO_REG8"; regulator-min-microvolt = <1500000>; regulator-max-microvolt = <1500000>; regulator-always-on; }; - vcc_18: LDO_REG9 { - regulator-name = "VCC18"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + vrtc_18: LDO_REG9 { + regulator-name = "LDO_REG9"; + /* Despite the datasheet stating 3.3V + * for REG9 and the driver expecting that, + * REG9 outputs 1.8V. + * Likely the CI20 uses a proprietary + * factory programmed chip variant. + * Since this is a simple on/off LDO the + * exact values do not matter. + */ + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; vcc_11: LDO_REG10 { - regulator-name = "VCC11"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1100000>; + regulator-name = "LDO_REG10"; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; regulator-always-on; }; }; @@ -261,7 +271,9 @@ rtc@51 { compatible = "nxp,pcf8563"; reg = <0x51>; - interrupts = <110>; + + interrupt-parent = <&gpf>; + interrupts = <30 IRQ_TYPE_LEVEL_LOW>; }; }; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index 5accda2767be..a3301bab9231 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include / { #address-cells = <1>; @@ -45,14 +46,6 @@ #clock-cells = <1>; }; - watchdog: watchdog@10002000 { - compatible = "ingenic,jz4740-watchdog"; - reg = <0x10002000 0x10>; - - clocks = <&cgu JZ4740_CLK_RTC>; - clock-names = "rtc"; - }; - tcu: timer@10002000 { compatible = "ingenic,jz4740-tcu", "simple-mfd"; reg = <0x10002000 0x1000>; @@ -73,6 +66,14 @@ interrupt-parent = <&intc>; interrupts = <23 22 21>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4740-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; }; rtc_dev: rtc@10003000 { diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index f928329b034b..bb89653d16a3 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include / { @@ -67,6 +68,14 @@ interrupt-parent = <&intc>; interrupts = <27 26 25>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4780-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; }; rtc_dev: rtc@10003000 { @@ -348,14 +357,6 @@ status = "disabled"; }; - watchdog: watchdog@10002000 { - compatible = "ingenic,jz4780-watchdog"; - reg = <0x10002000 0x10>; - - clocks = <&cgu JZ4780_CLK_RTCLK>; - clock-names = "rtc"; - }; - nemc: nemc@13410000 { compatible = "ingenic,jz4780-nemc"; reg = <0x13410000 0x10000>; diff --git a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts index aa5caaa31104..aad9a8a8669b 100644 --- a/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts +++ b/arch/mips/boot/dts/ralink/gardena_smart_gateway_mt7688.dts @@ -177,6 +177,9 @@ pinctrl-names = "default"; pinctrl-0 = <&pinmux_i2s_gpio>; /* GPIO0..3 */ + fifo-size = <8>; + tx-threshold = <8>; + rts-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; cts-gpios = <&gpio 2 GPIO_ACTIVE_LOW>; }; diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index c99166eadbde..255afcdd79c9 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -100,24 +100,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - /* cast to a __user pointer via "unsigned long" makes sparse happy */ - return (void __user *)(unsigned long)(long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = (struct pt_regs *) diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 02783e141c32..fef0fda8f82f 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -37,6 +37,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -52,6 +53,7 @@ static __inline__ long local_add_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_add_return \n" " addu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -84,6 +86,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set arch=r4000 \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" @@ -99,6 +102,7 @@ static __inline__ long local_sub_return(long i, local_t * l) __asm__ __volatile__( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" + __SYNC(full, loongson3_war) " \n" "1:" __LL "%1, %2 # local_sub_return \n" " subu %0, %1, %3 \n" __SC "%0, %2 \n" diff --git a/arch/mips/include/asm/sync.h b/arch/mips/include/asm/sync.h index 7c6a1095f556..aabd097933fe 100644 --- a/arch/mips/include/asm/sync.h +++ b/arch/mips/include/asm/sync.h @@ -155,9 +155,11 @@ * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use * optimized memory barrier primitives."). Here we specify that the affected * sync instructions should be emitted twice. + * Note that this expression is evaluated by the assembler (not the compiler), + * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. */ #ifdef CONFIG_CPU_CAVIUM_OCTEON -# define __SYNC_rpt(type) (1 + (type == __SYNC_wmb)) +# define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) #else # define __SYNC_rpt(type) 1 #endif diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index c3d4212b5f1d..d8d2c379a3a8 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -606,7 +606,8 @@ static void __init bootcmdline_init(char **cmdline_p) * If we're configured to take boot arguments from DT, look for those * now. */ - if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)) + if (IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB) || + IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)) of_scan_flat_dt(bootcmdline_scan_chosen, &dt_bootargs); #endif diff --git a/arch/mips/kernel/syscalls/Makefile b/arch/mips/kernel/syscalls/Makefile index a3d4bec695c6..6efb2f6889a7 100644 --- a/arch/mips/kernel/syscalls/Makefile +++ b/arch/mips/kernel/syscalls/Makefile @@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR $@ '$(syshdr_pfx_$(basetarget))' \ '$(syshdr_offset_$(basetarget))' -quiet_cmd_sysnr = SYSNR $@ +quiet_cmd_sysnr = SYSNR $@ cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \ '$(sysnr_abis_$(basetarget))' \ '$(sysnr_pfx_$(basetarget))' \ diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 6176b9acba95..d0d832ab3d3b 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } diff --git a/arch/mips/loongson64/platform.c b/arch/mips/loongson64/platform.c index 13f3404f0030..9674ae1361a8 100644 --- a/arch/mips/loongson64/platform.c +++ b/arch/mips/loongson64/platform.c @@ -27,6 +27,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c index d46655b914f1..c2ffcb920250 100644 --- a/arch/mips/sgi-ip30/ip30-irq.c +++ b/arch/mips/sgi-ip30/ip30-irq.c @@ -232,9 +232,10 @@ static void heart_domain_free(struct irq_domain *domain, return; irqd = irq_domain_get_irq_data(domain, virq); - clear_bit(irqd->hwirq, heart_irq_map); - if (irqd && irqd->chip_data) + if (irqd) { + clear_bit(irqd->hwirq, heart_irq_map); kfree(irqd->chip_data); + } } static const struct irq_domain_ops heart_domain_ops = { diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index e03e3c849f40..2f4f66a3bac0 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -173,23 +173,6 @@ struct compat_shmid64_ds { #define COMPAT_ELF_NGREG 80 typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static __inline__ void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = ¤t->thread.regs; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e2a412113359..f765385925e1 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -222,8 +222,7 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_FREE select HAVE_MMU_GATHER_PAGE_SIZE select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN @@ -238,6 +237,7 @@ config PPC select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE select NEED_SG_DMA_LENGTH select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 4e1d39847462..0b063830eea8 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -371,7 +371,7 @@ config PPC_PTDUMP config PPC_DEBUG_WX bool "Warn on W+X mappings at boot" - depends on PPC_PTDUMP + depends on PPC_PTDUMP && STRICT_KERNEL_RWX help Generate a warning if any W+X mappings are found at boot. diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index 134f12f89b92..2268396ff4bb 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -17,11 +17,11 @@ quiet_cmd_head_check = CHKHEAD $@ quiet_cmd_relocs_check = CHKREL $@ ifdef CONFIG_PPC_BOOK3S_64 cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \ + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@" else cmd_relocs_check = \ - $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" + $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif # `@true` prevents complaint when there is nothing to be done diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index 1699e9531552..00c4d843a023 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -228,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index f9dc597b0b86..91c8f1d9bcee 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) isync(); /* Context sync required after mtsrin() */ } -static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr, end; - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) return; addr = (__force u32)to; @@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, const void __user *from, u kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ } -static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +static __always_inline void prevent_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { u32 addr = (__force u32)to; u32 end = min(addr + size, TASK_SIZE); + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (!(dir & KUAP_WRITE)) + return; + if (!addr || addr >= TASK_SIZE || !size) return; @@ -131,12 +138,17 @@ static inline void prevent_user_access(void __user *to, const void __user *from, kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { + unsigned long begin = regs->kuap & 0xf0000000; + unsigned long end = regs->kuap << 28; + if (!is_write) return false; - return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); + return WARN(address < begin || address >= end, + "Bug: write fault blocked by segment registers !"); } #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 998317702630..dc5c039eb28e 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -49,7 +49,6 @@ static inline void pgtable_free(void *table, unsigned index_size) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { @@ -66,13 +65,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index f254de956d6a..c8d1076e0ebb 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -77,25 +77,27 @@ static inline void set_kuap(unsigned long value) isync(); } -static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { // This is written so we can resolve to a single case at build time - if (__builtin_constant_p(to) && to == NULL) + BUILD_BUG_ON(!__builtin_constant_p(dir)); + if (dir == KUAP_READ) set_kuap(AMR_KUAP_BLOCK_WRITE); - else if (__builtin_constant_p(from) && from == NULL) + else if (dir == KUAP_WRITE) set_kuap(AMR_KUAP_BLOCK_READ); else set_kuap(0); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index f6968c811026..a41e91bd0580 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,9 +19,7 @@ extern struct vmemmap_backing *vmemmap_list; extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); -#ifdef CONFIG_SMP extern void __tlb_remove_table(void *_table); -#endif void pte_frag_destroy(void *pte_frag); static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 74d0db511099..3e3cdfaa76c6 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -96,23 +96,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current->thread.regs; diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index eea28ca679db..bc7d9d06a6d9 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,7 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,7 +62,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); return ret; } @@ -76,7 +76,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; - allow_write_to_user(uaddr, sizeof(*uaddr)); + allow_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -97,7 +98,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; - prevent_write_to_user(uaddr, sizeof(*uaddr)); + prevent_read_write_user(uaddr, uaddr, sizeof(*uaddr)); + return ret; } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 5b5e39643a27..94f24928916a 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -2,6 +2,10 @@ #ifndef _ASM_POWERPC_KUP_H_ #define _ASM_POWERPC_KUP_H_ +#define KUAP_READ 1 +#define KUAP_WRITE 2 +#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) + #ifdef CONFIG_PPC64 #include #endif @@ -42,32 +46,48 @@ void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) { } + unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) { } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } + unsigned long size, unsigned long dir) { } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) { - allow_user_access(NULL, from, size); + allow_user_access(NULL, from, size, KUAP_READ); } static inline void allow_write_to_user(void __user *to, unsigned long size) { - allow_user_access(to, NULL, size); + allow_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void allow_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + allow_user_access(to, from, size, KUAP_READ_WRITE); } static inline void prevent_read_from_user(const void __user *from, unsigned long size) { - prevent_user_access(NULL, from, size); + prevent_user_access(NULL, from, size, KUAP_READ); } static inline void prevent_write_to_user(void __user *to, unsigned long size) { - prevent_user_access(to, NULL, size); + prevent_user_access(to, NULL, size, KUAP_WRITE); +} + +static inline void prevent_read_write_user(void __user *to, const void __user *from, + unsigned long size) +{ + prevent_user_access(to, from, size, KUAP_READ_WRITE); } #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_POWERPC_KUP_H_ */ +#endif /* _ASM_POWERPC_KUAP_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 1006a427e99c..1d70c80366fd 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -35,18 +35,19 @@ #include static inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } static inline void prevent_user_access(void __user *to, const void __user *from, - unsigned long size) + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), "Bug: fault blocked by AP register !"); diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 332b13b4ecdb..29c43665a753 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -46,7 +46,6 @@ static inline void pgtable_free(void *table, int shift) #define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { unsigned long pgf = (unsigned long)table; @@ -64,13 +63,6 @@ static inline void __tlb_remove_table(void *_table) pgtable_free(table, shift); } -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 7f1fd41e3065..9b97c6091c5c 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -295,8 +295,13 @@ static inline bool pfn_valid(unsigned long pfn) /* * Some number of bits at the level of the page table that points to * a hugepte are used to encode the size. This masks those bits. + * On 8xx, HW assistance requires 4k alignment for the hugepte. */ +#ifdef CONFIG_PPC_8xx +#define HUGEPD_SHIFT_MASK 0xfff +#else #define HUGEPD_SHIFT_MASK 0x3f +#endif #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index b2c0be93929d..7f3a8b902325 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -26,6 +26,17 @@ #define tlb_flush tlb_flush extern void tlb_flush(struct mmu_gather *tlb); +/* + * book3s: + * Hash does not use the linux page-tables, so we can avoid + * the TLB invalidate for page-table freeing, Radix otoh does use the + * page-tables and needs the TLBI. + * + * nohash: + * We still do TLB invalidate in the __pte_free_tlb routine before we + * add the page table pages to mmu gather table batch. + */ +#define tlb_needs_table_invalidate() radix_enabled() /* Get the generic bits... */ #include diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c92fe7fe9692..cafad1960e76 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -313,9 +313,9 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) unsigned long ret; barrier_nospec(); - allow_user_access(to, from, n); + allow_read_write_user(to, from, n); ret = __copy_tofrom_user(to, from, n); - prevent_user_access(to, from, n); + prevent_read_write_user(to, from, n); return ret; } #endif /* __powerpc64__ */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index e745abc5457a..245be4fafe13 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2193,11 +2193,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3dd1a422fc29..7b048cee767c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -525,12 +525,6 @@ static void eeh_rmv_device(struct eeh_dev *edev, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_entry, &rmv_data->removed_vf_list); @@ -1190,6 +1184,17 @@ void eeh_handle_special_event(void) eeh_pe_state_mark(pe, EEH_PE_RECOVERING); eeh_handle_normal_event(pe); } else { + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) + edev->mode &= ~EEH_DEV_NO_HANDLER; + + /* Notify all devices to be down */ + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_pe_report( + "error_detected(permanent failure)", pe, + eeh_report_failure, NULL); + pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); @@ -1198,16 +1203,6 @@ void eeh_handle_special_event(void) (phb_pe->state & EEH_PE_RECOVERING)) continue; - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) - edev->mode &= ~EEH_DEV_NO_HANDLER; - - /* Notify all devices to be down */ - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_pe_report( - "error_detected(permanent failure)", pe, - eeh_report_failure, NULL); bus = eeh_pe_bus_get(phb_pe); if (!bus) { pr_err("%s: Cannot find PCI bus for " diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d60908ea37fb..13f699256258 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -179,7 +179,7 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ - kuap_save_and_lock r11, r12, r9, r2, r0 + kuap_save_and_lock r11, r12, r9, r2, r6 addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ @@ -284,6 +284,7 @@ reenable_mmu: rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ kuap_restore r11, r2, r3, r4, r5 + lwz r2, GPR2(r11) b fast_exception_return #endif @@ -777,7 +778,7 @@ fast_exception_return: 1: lis r3,exc_exit_restart_end@ha addi r3,r3,exc_exit_restart_end@l cmplw r12,r3 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 bge 2b #else bge 3f @@ -785,7 +786,7 @@ fast_exception_return: lis r4,exc_exit_restart@ha addi r4,r4,exc_exit_restart@l cmplw r12,r4 -#if CONFIG_PPC_BOOK3S_601 +#ifdef CONFIG_PPC_BOOK3S_601 blt 2b #else blt 3f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 19f583e18402..98d8b6832fcb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -289,7 +289,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - rlwimi r10, r10, 0, 0x0f00 /* Clear bits 20-23 */ + rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 9524009ca1ae..d876eda92609 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -244,9 +244,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index e6c30cee6abf..d215f9554553 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -200,14 +200,27 @@ unsigned long get_tm_stackpointer(struct task_struct *tsk) * normal/non-checkpointed stack pointer. */ + unsigned long ret = tsk->thread.regs->gpr[1]; + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM BUG_ON(tsk != current); if (MSR_TM_ACTIVE(tsk->thread.regs->msr)) { + preempt_disable(); tm_reclaim_current(TM_CAUSE_SIGNAL); if (MSR_TM_TRANSACTIONAL(tsk->thread.regs->msr)) - return tsk->thread.ckpt_regs.gpr[1]; + ret = tsk->thread.ckpt_regs.gpr[1]; + + /* + * If we treclaim, we must clear the current thread's TM bits + * before re-enabling preemption. Otherwise we might be + * preempted and have the live MSR[TS] changed behind our back + * (tm_recheckpoint_new_task() would recheckpoint). Besides, we + * enter the signal handler in non-transactional state. + */ + tsk->thread.regs->msr &= ~MSR_TS_MASK; + preempt_enable(); } #endif - return tsk->thread.regs->gpr[1]; + return ret; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 98600b276f76..1b090a76b444 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -489,19 +489,11 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, */ static int save_tm_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int sigret) + struct mcontext __user *tm_frame, int sigret, + unsigned long msr) { - unsigned long msr = regs->msr; - WARN_ON(tm_suspend_disabled); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - /* Save both sets of general registers */ if (save_general_regs(¤t->thread.ckpt_regs, frame) || save_general_regs(regs, tm_frame)) @@ -912,6 +904,10 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -944,13 +940,13 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_frame = &rt_sf->uc_transact.uc_mcontext; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (__put_user((unsigned long)&rt_sf->uc_transact, &rt_sf->uc.uc_link) || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs)) goto badframe; - if (save_tm_user_regs(regs, frame, tm_frame, sigret)) + if (save_tm_user_regs(regs, frame, tm_frame, sigret, msr)) goto badframe; } else @@ -1369,6 +1365,10 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, int sigret; unsigned long tramp; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -1402,9 +1402,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset, #ifdef CONFIG_PPC_TRANSACTIONAL_MEM tm_mctx = &frame->mctx_transact; - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact, - sigret)) + sigret, msr)) goto badframe; } else diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 117515564ec7..84ed2e77ef9c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -192,7 +192,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, struct task_struct *tsk, - int signr, sigset_t *set, unsigned long handler) + int signr, sigset_t *set, unsigned long handler, + unsigned long msr) { /* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the * process never used altivec yet (MSR_VEC is zero in pt_regs of @@ -207,12 +208,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc); #endif struct pt_regs *regs = tsk->thread.regs; - unsigned long msr = tsk->thread.regs->msr; long err = 0; BUG_ON(tsk != current); - BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + BUG_ON(!MSR_TM_ACTIVE(msr)); WARN_ON(tm_suspend_disabled); @@ -222,13 +222,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, */ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX); - /* Remove TM bits from thread's MSR. The MSR in the sigcontext - * just indicates to userland that we were doing a transaction, but we - * don't want to return in transactional state. This also ensures - * that flush_fp_to_thread won't set TIF_RESTORE_TM again. - */ - regs->msr &= ~MSR_TS_MASK; - #ifdef CONFIG_ALTIVEC err |= __put_user(v_regs, &sc->v_regs); err |= __put_user(tm_v_regs, &tm_sc->v_regs); @@ -824,6 +817,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, unsigned long newsp = 0; long err = 0; struct pt_regs *regs = tsk->thread.regs; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* Save the thread's msr before get_tm_stackpointer() changes it */ + unsigned long msr = regs->msr; +#endif BUG_ON(tsk != current); @@ -841,7 +838,7 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= __put_user(0, &frame->uc.uc_flags); err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (MSR_TM_ACTIVE(regs->msr)) { + if (MSR_TM_ACTIVE(msr)) { /* The ucontext_t passed to userland points to the second * ucontext_t (for transactional state) with its uc_link ptr. */ @@ -849,7 +846,8 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext, &frame->uc_transact.uc_mcontext, tsk, ksig->sig, NULL, - (unsigned long)ksig->ka.sa.sa_handler); + (unsigned long)ksig->ka.sa.sa_handler, + msr); } else #endif { diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 8834220036a5..857ab49750f1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -303,6 +303,12 @@ SECTIONS *(.branch_lt) } +#ifdef CONFIG_DEBUG_INFO_BTF + .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { + *(.BTF) + } +#endif + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6ff3f896d908..ef6aa63b071b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2368,7 +2368,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2385,6 +2385,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 2de264fc3156..5914fbfa5e0a 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -543,7 +543,7 @@ kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start, ret = migrate_vma_setup(&mig); if (ret) - return ret; + goto out; spage = migrate_pfn_to_page(*mig.src); if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE)) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index ce4fcf76e53e..04512b4bb417 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1806,10 +1806,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: @@ -2030,6 +2032,7 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm, { /* We should not get called */ BUG(); + return 0; } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d83adb1e1490..6ef0151ff70a 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -631,7 +631,7 @@ static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive, srcu_idx = srcu_read_lock(&kvm->srcu); gfn = gpa_to_gfn(kvm_eq.qaddr); - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); if (1ull << kvm_eq.qshift > page_size) { srcu_read_unlock(&kvm->srcu, srcu_idx); pr_warn("Incompatible host page size %lx!\n", page_size); diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 2e496eb86e94..1139bc56e004 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -73,7 +73,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 inst; - int ra, rs, rt; enum emulation_result emulated = EMULATE_FAIL; int advance = 1; struct instruction_op op; @@ -85,10 +84,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) if (emulated != EMULATE_DONE) return emulated; - ra = get_ra(inst); - rs = get_rs(inst); - rt = get_rt(inst); - vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 75483b40fcb1..2bf7e1b4fd82 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -378,7 +378,6 @@ static inline void pgtable_free(void *table, int index) } } -#ifdef CONFIG_SMP void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) { unsigned long pgf = (unsigned long)table; @@ -395,12 +394,6 @@ void __tlb_remove_table(void *_table) return pgtable_free(table, index); } -#else -void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index) -{ - return pgtable_free(table, index); -} -#endif #ifdef CONFIG_PROC_FS atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b5047f9b5dec..e083a9f67f70 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault in a valid region (the exception table search passed // above), but blocked by KUAP is bad, it can never succeed. - if (bad_kuap_fault(regs, is_write)) + if (bad_kuap_fault(regs, address, is_write)) return true; // What's left? Kernel fault on user in well defined regions (extable @@ -354,6 +354,9 @@ static void sanity_check_fault(bool is_write, bool is_user, * Userspace trying to access kernel address, we get PROTFAULT for that. */ if (is_user && address >= TASK_SIZE) { + if ((long)address == -1) + return; + pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n", current->comm, current->pid, address, from_kuid(&init_user_ns, current_uid())); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 73d4873fc7f8..33b3461d91e8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -53,20 +53,24 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (pshift >= pdshift) { cachep = PGT_CACHE(PTE_T_ORDER); num_hugepd = 1 << (pshift - pdshift); + new = NULL; } else if (IS_ENABLED(CONFIG_PPC_8xx)) { - cachep = PGT_CACHE(PTE_INDEX_SIZE); + cachep = NULL; num_hugepd = 1; + new = pte_alloc_one(mm); } else { cachep = PGT_CACHE(pdshift - pshift); num_hugepd = 1; + new = NULL; } - if (!cachep) { + if (!cachep && !new) { WARN_ONCE(1, "No page table cache created for hugetlb tables"); return -ENOMEM; } - new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); + if (cachep) + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -97,7 +101,10 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (i < num_hugepd) { for (i = i - 1 ; i >= 0; i--, hpdp--) *hpdp = __hugepd(0); - kmem_cache_free(cachep, new); + if (cachep) + kmem_cache_free(cachep, new); + else + pte_free(mm, new); } else { kmemleak_ignore(new); } @@ -324,8 +331,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else if (IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_free_tlb(tlb, hugepte, - get_hugepd_cache_index(PTE_INDEX_SIZE)); + pgtable_free_tlb(tlb, hugepte, 0); else pgtable_free_tlb(tlb, hugepte, get_hugepd_cache_index(pdshift - shift)); @@ -639,12 +645,13 @@ static int __init hugetlbpage_init(void) * if we have pdshift and shift value same, we don't * use pgt cache for hugepd. */ - if (pdshift > shift && IS_ENABLED(CONFIG_PPC_8xx)) - pgtable_cache_add(PTE_INDEX_SIZE); - else if (pdshift > shift) - pgtable_cache_add(pdshift - shift); - else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) + if (pdshift > shift) { + if (!IS_ENABLED(CONFIG_PPC_8xx)) + pgtable_cache_add(pdshift - shift); + } else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || + IS_ENABLED(CONFIG_PPC_8xx)) { pgtable_cache_add(PTE_T_ORDER); + } configured = true; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f5535eae637f..ab81a727e273 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -369,7 +369,9 @@ static inline bool flush_coherent_icache(unsigned long addr) */ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { mb(); /* sync */ + allow_read_from_user((const void __user *)addr, L1_CACHE_BYTES); icbi((void *)addr); + prevent_read_from_user((const void __user *)addr, L1_CACHE_BYTES); mb(); /* sync */ isync(); return true; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 73b84166d06a..5fb90edd865e 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -218,6 +218,7 @@ void mark_rodata_ro(void) if (v_block_mapped((unsigned long)_sinittext)) { mmu_mark_rodata_ro(); + ptdump_check_wx(); return; } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2f9ddc29c535..c73205172447 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -173,10 +173,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) static void note_prot_wx(struct pg_state *st, unsigned long addr) { + pte_t pte = __pte(st->current_flags); + if (!IS_ENABLED(CONFIG_PPC_DEBUG_WX) || !st->check_wx) return; - if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + if (!pte_write(pte) || !pte_exec(pte)) return; WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index 43245f4a9bcb..6ffcb80cf844 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #define STACK_SP(STACK) *(STACK) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index f18d5067cd0f..fe489fc01c73 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,11 +51,6 @@ static struct task_struct *spusched_task; static struct timer_list spusched_timer; static struct timer_list spuloadavg_timer; -/* - * Priority of a normal, non-rt, non-niced'd process (aka nice level 0). - */ -#define NORMAL_PRIO 120 - /* * Frequency of the spu scheduler tick. By default we do one SPU scheduler * tick for every 10 CPU scheduler ticks. diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index da1068a9c263..67e4628dd527 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1558,6 +1558,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1570,13 +1574,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1590,6 +1592,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); #ifdef CONFIG_IOMMU_API iommu_register_group(&pe->table_group, @@ -2889,9 +2900,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) struct pci_dn *pdn; int mul, total_vfs; - if (!pdev->is_physfn || pci_dev_is_added(pdev)) - return; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; pdn->m64_single_mode = false; @@ -2966,6 +2974,30 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res->end = res->start - 1; } } + +static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) +{ + if (WARN_ON(pci_dev_is_added(pdev))) + return; + + if (pdev->is_virtfn) { + struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); + + /* + * VF PEs are single-device PEs so their pdev pointer needs to + * be set. The pdev doesn't exist when the PE is allocated (in + * (pcibios_sriov_enable()) so we fix it up here. + */ + pe->pdev = pdev; + WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); + } else if (pdev->is_physfn) { + /* + * For PFs adjust their allocated IOV resources to match what + * the PHB can support using it's M64 BAR table. + */ + pnv_pci_ioda_fixup_iov_resources(pdev); + } +} #endif /* CONFIG_PCI_IOV */ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, @@ -3862,7 +3894,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, ppc_md.pcibios_default_alignment = pnv_pci_default_alignment; #ifdef CONFIG_PCI_IOV - ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources; + ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov; ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment; ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable; ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index c0bea75ac27b..8307e1f4086c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -814,24 +814,6 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) { struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; -#ifdef CONFIG_PCI_IOV - struct pnv_ioda_pe *pe; - struct pci_dn *pdn; - - /* Fix the VF pdn PE number */ - if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); - list_for_each_entry(pe, &phb->ioda.pe_list, list) { - if (pe->rid == ((pdev->bus->number << 8) | - (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; - pe->pdev = pdev; - break; - } - } - } -#endif /* CONFIG_PCI_IOV */ if (phb && phb->dma_dev_setup) phb->dma_dev_setup(phb, pdev); diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index c126b94d1943..a4d40a3ceea3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -360,8 +360,10 @@ static bool lmb_is_removable(struct drmem_lmb *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6ba081dd61c9..b4ce9d472dfe 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "pseries.h" @@ -133,10 +132,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -147,25 +146,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -194,7 +193,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, unsigned long flags; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -210,8 +210,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -262,16 +263,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -286,7 +287,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -401,6 +402,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); @@ -1320,15 +1334,7 @@ void iommu_init_early_pSeries(void) of_reconfig_notifier_register(&iommu_reconfig_nb); register_memory_notifier(&iommu_mem_nb); - /* - * Secure guest memory is inacessible to devices so regular DMA isn't - * possible. - * - * In that case keep devices' dma_map_ops as NULL so that the generic - * DMA code path will use SWIOTLB to bounce buffers for DMA. - */ - if (!is_secure_guest()) - set_pci_dma_ops(&dma_iommu_ops); + set_pci_dma_ops(&dma_iommu_ops); } static int __init disable_multitce(char *str) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index e33e8bc4b69b..38c306551f76 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -435,10 +435,10 @@ static void maxmem_data(struct seq_file *m) { unsigned long maxmem = 0; - maxmem += drmem_info->n_lmbs * drmem_info->lmb_size; + maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size; maxmem += hugetlb_total_pages() * PAGE_SIZE; - seq_printf(m, "MaxMem=%ld\n", maxmem); + seq_printf(m, "MaxMem=%lu\n", maxmem); } static int pseries_lparcfg_data(struct seq_file *m, void *v) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index c2ef320ba1bf..eb420655ed0b 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -322,6 +322,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) p->bus = nvdimm_bus_register(NULL, &p->bus_desc); if (!p->bus) { dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn); + kfree(p->bus_desc.provider_name); return -ENXIO; } @@ -477,6 +478,7 @@ static int papr_scm_remove(struct platform_device *pdev) nvdimm_bus_unregister(p->bus); drc_pmem_unbind(p); + kfree(p->bus_desc.provider_name); kfree(p); return 0; diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 79e2287991db..f682b7babc09 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1176,6 +1176,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index 7b9fe0a567cf..014e00e74d2b 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -10,14 +10,21 @@ # based on relocs_check.pl # Copyright Β© 2009 IBM Corporation -if [ $# -lt 2 ]; then - echo "$0 [path to objdump] [path to vmlinux]" 1>&2 +if [ $# -lt 3 ]; then + echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2 exit 1 fi -# Have Kbuild supply the path to objdump so we handle cross compilation. +# Have Kbuild supply the path to objdump and nm so we handle cross compilation. objdump="$1" -vmlinux="$2" +nm="$2" +vmlinux="$3" + +# Remove from the bad relocations those that match an undefined weak symbol +# which will result in an absolute relocation to 0. +# Weak unresolved symbols are of that form in nm output: +# " w _binary__btf_vmlinux_bin_end" +undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }') bad_relocs=$( $objdump -R "$vmlinux" | @@ -26,8 +33,6 @@ $objdump -R "$vmlinux" | # These relocations are okay # On PPC64: # R_PPC64_RELATIVE, R_PPC64_NONE - # R_PPC64_ADDR64 mach_ - # R_PPC64_ADDR64 __crc_ # On PPC: # R_PPC_RELATIVE, R_PPC_ADDR16_HI, # R_PPC_ADDR16_HA,R_PPC_ADDR16_LO, @@ -39,8 +44,7 @@ R_PPC_ADDR16_HI R_PPC_ADDR16_HA R_PPC_RELATIVE R_PPC_NONE' | - grep -E -v '\a7; } -static inline void syscall_set_nr(struct task_struct *task, - struct pt_regs *regs, - int sysno) -{ - regs->a7 = sysno; -} - static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index e163b7b64c86..f6486d495601 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -228,20 +228,13 @@ check_syscall_nr: /* Check to make sure we don't jump to a bogus syscall number. */ li t0, __NR_syscalls la s0, sys_ni_syscall - /* - * The tracer can change syscall number to valid/invalid value. - * We use syscall_set_nr helper in syscall_trace_enter thus we - * cannot trust the current value in a7 and have to reload from - * the current task pt_regs. - */ - REG_L a7, PT_A7(sp) /* * Syscall number held in a7. * If syscall number is above allowed value, redirect to ni_syscall. */ bge a7, t0, 1f /* - * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. + * Check if syscall is rejected by tracer, i.e., a7 == -1. * If yes, we pretend it was executed. */ li t1, -1 @@ -334,6 +327,7 @@ work_resched: handle_syscall_trace_enter: move a0, sp call do_syscall_trace_enter + move t0, a0 REG_L a0, PT_A0(sp) REG_L a1, PT_A1(sp) REG_L a2, PT_A2(sp) @@ -342,6 +336,7 @@ handle_syscall_trace_enter: REG_L a5, PT_A5(sp) REG_L a6, PT_A6(sp) REG_L a7, PT_A7(sp) + bnez t0, ret_from_syscall_rejected j check_syscall_nr handle_syscall_trace_exit: move a0, sp diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index a4242be66966..e4d9baf97323 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -58,6 +58,12 @@ _start_kernel: /* Reset all registers except ra, a0, a1 */ call reset_regs + /* Setup a PMP to permit access to all of memory. */ + li a0, -1 + csrw CSR_PMPADDR0, a0 + li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) + csrw CSR_PMPCFG0, a0 + /* * The hartid in a0 is expected later on, and we have no firmware * to hand it to us. diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index b7401858d872..8bbe5dbe1341 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -8,6 +8,10 @@ #include #include #include +#include +#include +#include +#include static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { @@ -386,3 +390,15 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +#define VMALLOC_MODULE_START \ + max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, + VMALLOC_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 407464201b91..444dc7b0fd78 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -148,21 +148,19 @@ long arch_ptrace(struct task_struct *child, long request, * Allows PTRACE_SYSCALL to work. These are called from entry.S in * {handle,ret_from}_syscall. */ -__visible void do_syscall_trace_enter(struct pt_regs *regs) +__visible int do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) if (tracehook_report_syscall_entry(regs)) - syscall_set_nr(current, regs, -1); + return -1; /* * Do the secure computing after ptrace; failures should be fast. * If this fails we might have return value in a0 from seccomp * (via SECCOMP_RET_ERRNO/TRACE). */ - if (secure_computing() == -1) { - syscall_set_nr(current, regs, -1); - return; - } + if (secure_computing() == -1) + return -1; #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) @@ -170,6 +168,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs) #endif audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); + return 0; } __visible void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f4cad5163bf2..ffb3d94bf0cc 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -156,6 +156,6 @@ void __init trap_init(void) csr_write(CSR_SCRATCH, 0); /* Set the exception vector address */ csr_write(CSR_TVEC, &handle_exception); - /* Enable all interrupts */ - csr_write(CSR_IE, -1); + /* Enable interrupts */ + csr_write(CSR_IE, IE_SIE | IE_EIE); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 965a8cf4829c..fab855963c73 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -131,7 +131,7 @@ void __init setup_bootmem(void) for_each_memblock(memory, reg) { phys_addr_t end = reg->base + reg->size; - if (reg->base <= vmlinux_end && vmlinux_end <= end) { + if (reg->base <= vmlinux_start && vmlinux_end <= end) { mem_size = min(reg->size, (phys_addr_t)-PAGE_OFFSET); /* diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 7fbf56aab661..e2279fed8f56 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -596,7 +601,8 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + if (reg == RV_REG_RA) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); } @@ -1426,6 +1432,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1443,7 +1453,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ba8556bb0fb1..8dfa2cf1f05c 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -69,7 +69,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include # cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls -ifeq ($(call cc-option-yn,-mpacked-stack),y) +ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y) cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK endif @@ -146,7 +146,7 @@ all: bzImage #KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg... KBUILD_IMAGE := $(boot)/bzImage -install: vmlinux +install: $(Q)$(MAKE) $(build)=$(boot) $@ bzImage: vmlinux diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e2c47d3a1c89..0ff9261c915e 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE $(obj)/startup.a: $(OBJECTS) FORCE $(call if_changed,ar) -install: $(CONFIGURE) $(obj)/bzImage +install: sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index 4b86a8d3c121..dae10961d072 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -329,7 +329,7 @@ ENTRY(startup_kdump) .quad .Lduct # cr5: primary-aste origin .quad 0 # cr6: I/O interrupts .quad 0 # cr7: secondary space segment table - .quad 0 # cr8: access registers translation + .quad 0x0000000000008000 # cr8: access registers translation .quad 0 # cr9: tracing off .quad 0 # cr10: tracing off .quad 0 # cr11: tracing off diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c index 5d12352545c5..5591243d673e 100644 --- a/arch/s390/boot/kaslr.c +++ b/arch/s390/boot/kaslr.c @@ -75,7 +75,7 @@ static unsigned long get_random(unsigned long limit) *(unsigned long *) prng.parm_block ^= seed; for (i = 0; i < 16; i++) { cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, - (char *) entropy, (char *) entropy, + (u8 *) entropy, (u8 *) entropy, sizeof(entropy)); memcpy(prng.parm_block, entropy, sizeof(entropy)); } diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index ed007f4a6444..3f501159ee9f 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -15,7 +15,8 @@ void uv_query_info(void) if (!test_facility(158)) return; - if (uv_call(0, (uint64_t)&uvcb)) + /* rc==0x100 means that there is additional data we do not process */ + if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100) return; if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index a2b11ac00f60..7725f8006fdf 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -10,15 +10,14 @@ #define __EMIT_BUG(x) do { \ asm_inline volatile( \ - "0: j 0b+2\n" \ - "1:\n" \ + "0: mc 0,0\n" \ ".section .rodata.str,\"aMS\",@progbits,1\n" \ - "2: .asciz \""__FILE__"\"\n" \ + "1: .asciz \""__FILE__"\"\n" \ ".previous\n" \ ".section __bug_table,\"awM\",@progbits,%2\n" \ - "3: .long 1b-3b,2b-3b\n" \ + "2: .long 0b-2b,1b-2b\n" \ " .short %0,%1\n" \ - " .org 3b+%2\n" \ + " .org 2b+%2\n" \ ".previous\n" \ : : "i" (__LINE__), \ "i" (x), \ @@ -29,12 +28,11 @@ #define __EMIT_BUG(x) do { \ asm_inline volatile( \ - "0: j 0b+2\n" \ - "1:\n" \ + "0: mc 0,0\n" \ ".section __bug_table,\"awM\",@progbits,%1\n" \ - "2: .long 1b-2b\n" \ + "1: .long 0b-1b\n" \ " .short %0\n" \ - " .org 2b+%1\n" \ + " .org 1b+%1\n" \ ".previous\n" \ : : "i" (x), \ "i" (sizeof(struct bug_entry))); \ diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 63b46e30b2c3..9547cd5d6cdc 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -177,11 +177,7 @@ static inline void __user *compat_ptr(compat_uptr_t uptr) { return (void __user *)(unsigned long)(uptr & 0x7fffffffUL); } - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} +#define compat_ptr(uptr) compat_ptr(uptr) #ifdef CONFIG_COMPAT diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a4d38092530a..1019efd85b9d 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,6 +33,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include #ifndef __ASSEMBLY__ @@ -40,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 3a06c264ea53..b05187ce5dbd 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -180,7 +180,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); -int clp_rescan_pci_devices_simple(void); +int clp_rescan_pci_devices_simple(u32 *fid); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7b03037a8475..9c578ad5409e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -750,6 +750,12 @@ static inline int pmd_write(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline int pmd_dirty(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h index 71e3f0146cda..7870cf834533 100644 --- a/arch/s390/include/asm/qdio.h +++ b/arch/s390/include/asm/qdio.h @@ -227,7 +227,7 @@ struct qdio_buffer { * @sbal: absolute SBAL address */ struct sl_element { - unsigned long sbal; + u64 sbal; } __attribute__ ((packed)); /** diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 670f14a228e5..6bf3a45ccfec 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -155,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index b2956d49b6ad..1d3927e01a5f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -45,6 +45,7 @@ void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); +void monitor_event_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index c3597d2e2ae0..f942341429b1 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,6 +26,12 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -40,9 +46,16 @@ ENTRY(ftrace_caller) #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 59dee9d3bebf..eee3a482195a 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -81,7 +81,7 @@ PGM_CHECK_DEFAULT /* 3c */ PGM_CHECK_DEFAULT /* 3d */ PGM_CHECK_DEFAULT /* 3e */ PGM_CHECK_DEFAULT /* 3f */ -PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ PGM_CHECK_DEFAULT /* 42 */ PGM_CHECK_DEFAULT /* 43 */ diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 164c0282b41a..dc75588d7894 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -53,11 +53,6 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (fixup) regs->psw.addr = extable_fixup(fixup); else { - enum bug_trap_type btt; - - btt = report_bug(regs->psw.addr, regs); - if (btt == BUG_TRAP_TYPE_WARN) - return; die(regs, str); } } @@ -245,6 +240,27 @@ void space_switch_exception(struct pt_regs *regs) do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } +void monitor_event_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + if (user_mode(regs)) + return; + + switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { + case BUG_TRAP_TYPE_NONE: + fixup = s390_search_extables(regs->psw.addr); + if (fixup) + regs->psw.addr = extable_fixup(fixup); + break; + case BUG_TRAP_TYPE_WARN: + break; + case BUG_TRAP_TYPE_BUG: + die(regs, "monitor event"); + break; + } +} + void kernel_stack_overflow(struct pt_regs *regs) { bust_spinlocks(1); @@ -255,8 +271,23 @@ void kernel_stack_overflow(struct pt_regs *regs) } NOKPROBE_SYMBOL(kernel_stack_overflow); +static void test_monitor_call(void) +{ + int val = 1; + + asm volatile( + " mc 0,0\n" + "0: xgr %0,%0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (val)); + if (!val) + panic("Monitor call doesn't work!\n"); +} + void __init trap_init(void) { sort_extable(__start_dma_ex_table, __stop_dma_ex_table); local_mcck_enable(); + test_monitor_call(); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 165dea4c7f19..c06c89d370a7 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2190,7 +2190,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2499,7 +2499,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2579,7 +2579,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d9e6bf3d54f0..876802894b35 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2860,9 +2860,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 | CR14_UNUSED_33 | CR14_EXTERNAL_DAMAGE_SUBMASK; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -4351,7 +4349,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, switch (ioctl) { case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index b0246c705a19..5674710a4841 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer */ @@ -11,6 +11,9 @@ #include #include +#include +#include +#include /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -267,3 +270,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8e872951c07b..60716d18ce5a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -424,7 +424,7 @@ static void zpci_map_resources(struct pci_dev *pdev) if (zpci_use_mio(zdev)) pdev->resource[i].start = - (resource_size_t __force) zdev->bars[i].mio_wb; + (resource_size_t __force) zdev->bars[i].mio_wt; else pdev->resource[i].start = (resource_size_t __force) pci_iomap_range_fh(pdev, i, 0, 0); @@ -531,7 +531,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, flags |= IORESOURCE_MEM_64; if (zpci_use_mio(zdev)) - addr = (unsigned long) zdev->bars[i].mio_wb; + addr = (unsigned long) zdev->bars[i].mio_wt; else addr = ZPCI_ADDR(entry); size = 1UL << zdev->bars[i].size; @@ -939,5 +939,5 @@ subsys_initcall_sync(pci_base_init); void zpci_rescan(void) { if (zpci_is_enabled()) - clp_rescan_pci_devices_simple(); + clp_rescan_pci_devices_simple(NULL); } diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 4c613e569fe0..0d3d8f170ea4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -240,12 +240,14 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) } /* - * Enable/Disable a given PCI function defined by its function handle. + * Enable/Disable a given PCI function and update its function handle if + * necessary */ -static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) +static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; int rc, retries = 100; + u32 fid = zdev->fid; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -256,7 +258,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_SET_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = *fh; + rrb->request.fh = zdev->fh; rrb->request.oc = command; rrb->request.ndas = nr_dma_as; @@ -269,12 +271,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - *fh = rrb->response.fh; - else { + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Set PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; + } + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { + zdev->fh = rrb->response.fh; + } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY && + rrb->response.fh == 0) { + /* Function is already in desired state - update handle */ + rc = clp_rescan_pci_devices_simple(&fid); } clp_free_block(rrb); return rc; @@ -282,18 +289,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) { - u32 fh = zdev->fh; int rc; - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN); - zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN); + zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc); if (rc) goto out; - zdev->fh = fh; if (zpci_use_mio(zdev)) { - rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); - zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); + rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO); + zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", + zdev->fid, zdev->fh, rc); if (rc) clp_disable_fh(zdev); } @@ -309,11 +315,8 @@ int clp_disable_fh(struct zpci_dev *zdev) if (!zdev_enabled(zdev)) return 0; - rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN); + rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN); zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); - if (!rc) - zdev->fh = fh; - return rc; } @@ -370,10 +373,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) static void __clp_update(struct clp_fh_list_entry *entry, void *data) { struct zpci_dev *zdev; + u32 *fid = data; if (!entry->vendor_id) return; + if (fid && *fid != entry->fid) + return; + zdev = get_zdev_by_fid(entry->fid); if (!zdev) return; @@ -413,7 +420,10 @@ int clp_rescan_pci_devices(void) return rc; } -int clp_rescan_pci_devices_simple(void) +/* Rescan PCI functions and refresh function handles. If fid is non-NULL only + * refresh the handle of the function matching @fid + */ +int clp_rescan_pci_devices_simple(u32 *fid) { struct clp_req_rsp_list_pci *rrb; int rc; @@ -422,7 +432,7 @@ int clp_rescan_pci_devices_simple(void) if (!rrb) return -ENOMEM; - rc = clp_list_pci(rrb, NULL, __clp_update); + rc = clp_list_pci(rrb, fid, __clp_update); clp_free_block(rrb); return rc; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index a433ba01a317..215f17437a4f 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -13,6 +13,8 @@ #include #include +#include "../../../drivers/pci/pci.h" + #include #define zpci_attr(name, fmt, member) \ @@ -49,31 +51,50 @@ static DEVICE_ATTR_RO(mio_enabled); static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct kernfs_node *kn; struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); - int ret; - - if (!device_remove_file_self(dev, attr)) - return count; - + int ret = 0; + + /* Can't use device_remove_self() here as that would lead us to lock + * the pci_rescan_remove_lock while holding the device' kernfs lock. + * This would create a possible deadlock with disable_slot() which is + * not directly protected by the device' kernfs lock but takes it + * during the device removal which happens under + * pci_rescan_remove_lock. + * + * This is analogous to sdev_store_delete() in + * drivers/scsi/scsi_sysfs.c + */ + kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); + WARN_ON_ONCE(!kn); + /* device_remove_file() serializes concurrent calls ignoring all but + * the first + */ + device_remove_file(dev, attr); + + /* A concurrent call to recover_store() may slip between + * sysfs_break_active_protection() and the sysfs file removal. + * Once it unblocks from pci_lock_rescan_remove() the original pdev + * will already be removed. + */ pci_lock_rescan_remove(); - pci_stop_and_remove_bus_device(pdev); - ret = zpci_disable_device(zdev); - if (ret) - goto error; - - ret = zpci_enable_device(zdev); - if (ret) - goto error; - - pci_rescan_bus(zdev->bus); + if (pci_dev_is_added(pdev)) { + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + goto out; + + ret = zpci_enable_device(zdev); + if (ret) + goto out; + pci_rescan_bus(zdev->bus); + } +out: pci_unlock_rescan_remove(); - - return count; - -error: - pci_unlock_rescan_remove(); - return ret; + if (kn) + sysfs_unbreak_active_protection(kn); + return ret ? ret : count; } static DEVICE_ATTR_WO(recover); diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..b887cc402b71 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index eb24cb1afc11..18e9fb6fcf1b 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -65,7 +65,6 @@ config SPARC64 select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_RCU_TABLE_FREE if SMP - select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE select HAVE_MEMBLOCK_NODE_MAP select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 30b1763580b1..40a267b3bd52 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -125,23 +125,6 @@ typedef u32 compat_sigset_word; #define COMPAT_OFF_T_MAX 0x7fffffff -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - #ifdef CONFIG_COMPAT static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h index a2f3fa61ee36..8cb8f3833239 100644 --- a/arch/sparc/include/asm/tlb_64.h +++ b/arch/sparc/include/asm/tlb_64.h @@ -28,6 +28,15 @@ void flush_tlb_pending(void); #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) #define tlb_flush(tlb) flush_tlb_pending() +/* + * SPARC64's hardware TLB fill does not use the Linux page-tables + * and therefore we don't need a TLBI when freeing page-table pages. + */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE +#define tlb_needs_table_invalidate() (false) +#endif + #include #endif /* _SPARC64_TLB_H */ diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 5b933a598a33..0ea1240d2ea1 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -17,19 +17,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 7ec79918b566..f99e99e58075 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -171,12 +171,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 7145ce699982..eca6c452a41b 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -82,8 +82,8 @@ __preinit_array_end = .; } .init_array : { - /* dummy - we call this ourselves */ __init_array_start = .; + *(.init_array) __init_array_end = .; } .fini_array : { diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index c69d69ee96be..f5001481010c 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -103,6 +103,7 @@ SECTIONS be empty, which isn't pretty. */ . = ALIGN(32 / 8); .preinit_array : { *(.preinit_array) } + .init_array : { *(.init_array) } .fini_array : { *(.fini_array) } .data : { INIT_TASK_DATA(KERNEL_STACK_SIZE) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index af9c967782f6..bf07a8c0f495 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -123,6 +123,7 @@ config MPENTIUMM config MPENTIUM4 bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon" depends on X86_32 + select X86_P6_NOP ---help--- Select this for Intel Pentium 4 chips. This includes the Pentium 4, Pentium D, P4-based Celeron and Xeon, and @@ -155,9 +156,8 @@ config MPENTIUM4 -Paxville -Dempsey - config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 ---help--- Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 ---help--- Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,90 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" ---help--- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + ---help--- + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + ---help--- + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + ---help--- + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + ---help--- + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + ---help--- + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + ---help--- + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + ---help--- + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + ---help--- + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + ---help--- + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + ---help--- + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + ---help--- + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -260,6 +338,7 @@ config MVIAC7 config MPSC bool "Intel P4 / older Netburst based Xeon" + select X86_P6_NOP depends on X86_64 ---help--- Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey @@ -269,8 +348,19 @@ config MPSC using the cpu family field in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. +config MATOM + bool "Intel Atom" + select X86_P6_NOP + ---help--- + + Select this for the Intel Atom platform. Intel Atom CPUs have an + in-order pipelining architecture and thus can benefit from + accordingly optimized code. Use a recent GCC with specific Atom + support in order to fully benefit from selecting this option. + config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" + select X86_P6_NOP ---help--- Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,14 +368,133 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) -config MATOM - bool "Intel Atom" + Enables -march=core2 + +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP ---help--- - Select this for the Intel Atom platform. Intel Atom CPUs have an - in-order pipelining architecture and thus can benefit from - accordingly optimized code. Use a recent GCC with specific Atom - support in order to fully benefit from selecting this option. + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + ---help--- + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + ---help--- + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + ---help--- + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + ---help--- + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + ---help--- + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + ---help--- + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + ---help--- + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + ---help--- + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + ---help--- + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + ---help--- + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + ---help--- + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + ---help--- + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + ---help--- + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake config GENERIC_CPU bool "Generic-x86-64" @@ -294,6 +503,19 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config MNATIVE + bool "Native optimizations autodetected by GCC" + ---help--- + + GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. -march=native + also detects and applies additional settings beyond -march specific + to your CPU, (eg. -msse4). Unless you have a specific reason not to + (e.g. distcc cross-compiling), you should probably be using + -march=native rather than anything listed below. + + Enables -march=native + endchoice config X86_GENERIC @@ -318,7 +540,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MPENTIUMM || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MNATIVE || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,35 +558,36 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK8SSE3 || MK7 || MEFFICEON || MCORE2 || MK10 || MBARCELONA || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MNATIVE config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MATOM || MNATIVE config X86_USE_3DNOW def_bool y depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML -# -# P6_NOPs are a relatively minor optimization that require a family >= -# 6 processor, except that it is broken on certain VIA chips. -# Furthermore, AMD chips prefer a totally different sequence of NOPs -# (which work on all CPUs). In addition, it looks like Virtual PC -# does not understand them. -# -# As a result, disallow these if we're not compiling for X86_64 (these -# NOPs do work on all x86-64 capable chips); the list of processors in -# the right-hand clause are the cores that benefit from this optimization. -# config X86_P6_NOP - def_bool y - depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + default n + bool "Support for P6_NOPs on Intel chips" + depends on (MCORE2 || MPENTIUM4 || MPSC || MATOM || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MNATIVE) + ---help--- + P6_NOPs are a relatively minor optimization that require a family >= + 6 processor, except that it is broken on certain VIA chips. + Furthermore, AMD chips prefer a totally different sequence of NOPs + (which work on all CPUs). In addition, it looks like Virtual PC + does not understand them. + + As a result, disallow these if we're not compiling for X86_64 (these + NOPs do work on all x86-64 capable chips); the list of processors in + the right-hand clause are the cores that benefit from this optimization. + + Say Y if you have Intel CPU newer than Pentium Pro, N otherwise. config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK8SSE3 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MNATIVE || MATOM) || X86_64 config X86_CMPXCHG64 def_bool y @@ -374,7 +597,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MJAGUAR || MK7 || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 94df0868804b..dcbed7e3a070 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -119,13 +119,53 @@ else KBUILD_CFLAGS += $(call cc-option,-mskip-rax-setup) # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) + cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) + cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-mtune=k8) + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) + cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) + cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) + cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) + cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) cflags-$(CONFIG_MCORE2) += \ - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + $(call cc-option,-march=core2,$(call cc-option,-mtune=core2)) + cflags-$(CONFIG_MNEHALEM) += \ + $(call cc-option,-march=nehalem,$(call cc-option,-mtune=nehalem)) + cflags-$(CONFIG_MWESTMERE) += \ + $(call cc-option,-march=westmere,$(call cc-option,-mtune=westmere)) + cflags-$(CONFIG_MSILVERMONT) += \ + $(call cc-option,-march=silvermont,$(call cc-option,-mtune=silvermont)) + cflags-$(CONFIG_MGOLDMONT) += \ + $(call cc-option,-march=goldmont,$(call cc-option,-mtune=goldmont)) + cflags-$(CONFIG_MGOLDMONTPLUS) += \ + $(call cc-option,-march=goldmont-plus,$(call cc-option,-mtune=goldmont-plus)) + cflags-$(CONFIG_MSANDYBRIDGE) += \ + $(call cc-option,-march=sandybridge,$(call cc-option,-mtune=sandybridge)) + cflags-$(CONFIG_MIVYBRIDGE) += \ + $(call cc-option,-march=ivybridge,$(call cc-option,-mtune=ivybridge)) + cflags-$(CONFIG_MHASWELL) += \ + $(call cc-option,-march=haswell,$(call cc-option,-mtune=haswell)) + cflags-$(CONFIG_MBROADWELL) += \ + $(call cc-option,-march=broadwell,$(call cc-option,-mtune=broadwell)) + cflags-$(CONFIG_MSKYLAKE) += \ + $(call cc-option,-march=skylake,$(call cc-option,-mtune=skylake)) + cflags-$(CONFIG_MSKYLAKEX) += \ + $(call cc-option,-march=skylake-avx512,$(call cc-option,-mtune=skylake-avx512)) + cflags-$(CONFIG_MCANNONLAKE) += \ + $(call cc-option,-march=cannonlake,$(call cc-option,-mtune=cannonlake)) + cflags-$(CONFIG_MICELAKE) += \ + $(call cc-option,-march=icelake-client,$(call cc-option,-mtune=icelake-client)) + cflags-$(CONFIG_MCASCADELAKE) += \ + $(call cc-option,-march=cascadelake,$(call cc-option,-mtune=cascadelake)) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) \ + $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index cd3056759880..2c81838df533 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -24,7 +24,19 @@ cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += -march=athlon +cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon) +cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3,-march=athlon) +cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon) +cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon) +cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon) +cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon) +cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon) +cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon) +cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3,-march=athlon) +cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4,-march=athlon) +cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1,-march=athlon) +cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2,-march=athlon) cflags-$(CONFIG_MCRUSOE) += -march=i686 -falign-functions=0 -falign-jumps=0 -falign-loops=0 cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) -falign-functions=0 -falign-jumps=0 -falign-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) @@ -33,8 +45,22 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) -falign-fu cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) cflags-$(CONFIG_MVIAC7) += -march=i686 cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) -cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) +cflags-$(CONFIG_MNEHALEM) += -march=i686 $(call tune,nehalem) +cflags-$(CONFIG_MWESTMERE) += -march=i686 $(call tune,westmere) +cflags-$(CONFIG_MSILVERMONT) += -march=i686 $(call tune,silvermont) +cflags-$(CONFIG_MGOLDMONT) += -march=i686 $(call tune,goldmont) +cflags-$(CONFIG_MGOLDMONTPLUS) += -march=i686 $(call tune,goldmont-plus) +cflags-$(CONFIG_MSANDYBRIDGE) += -march=i686 $(call tune,sandybridge) +cflags-$(CONFIG_MIVYBRIDGE) += -march=i686 $(call tune,ivybridge) +cflags-$(CONFIG_MHASWELL) += -march=i686 $(call tune,haswell) +cflags-$(CONFIG_MBROADWELL) += -march=i686 $(call tune,broadwell) +cflags-$(CONFIG_MSKYLAKE) += -march=i686 $(call tune,skylake) +cflags-$(CONFIG_MSKYLAKEX) += -march=i686 $(call tune,skylake-avx512) +cflags-$(CONFIG_MCANNONLAKE) += -march=i686 $(call tune,cannonlake) +cflags-$(CONFIG_MICELAKE) += -march=i686 $(call tune,icelake-client) +cflags-$(CONFIG_MCASCADELAKE) += -march=i686 $(call tune,cascadelake) +cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell,$(call cc-option,-march=core2,-march=i686)) \ + $(call cc-option,-mtune=bonnell,$(call cc-option,-mtune=generic)) # AMD Elan support cflags-$(CONFIG_MELAN) += -march=i486 diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 95410d6ee2ff..748b6d28a91d 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -88,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [a-zA-Z] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 25019d42ae93..ef2ad7253cd5 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -393,7 +393,13 @@ int count_immovable_mem_regions(void) table = table_addr + sizeof(struct acpi_table_srat); while (table + sizeof(struct acpi_subtable_header) < table_end) { + sub_table = (struct acpi_subtable_header *)table; + if (!sub_table->length) { + debug_putstr("Invalid zero length SRAT subtable.\n"); + return 0; + } + if (sub_table->type == ACPI_SRAT_TYPE_MEMORY_AFFINITY) { struct acpi_srat_mem_affinity *ma; diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 748456c365f4..9557c5a15b91 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -29,9 +29,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 240626e7f55a..43842fade8fa 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index a7752cd78b89..3253797fa8a1 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -246,6 +246,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, @@ -301,6 +302,25 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + +static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) +{ + if (!(x86_pmu.flags & PMU_FL_PAIR)) + return false; + + switch (amd_get_event_code(hwc)) { + case 0x003: return true; /* Retired SSE/AVX FLOPs */ + default: return false; + } +} + static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -319,14 +339,6 @@ static int amd_core_hw_config(struct perf_event *event) return 0; } -/* - * AMD64 events are detected based on their event codes. - */ -static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) -{ - return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); -} - static inline int amd_is_nb_event(struct hw_perf_event *hwc) { return (hwc->config & 0xe0) == 0xe0; @@ -855,6 +867,20 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, } } +static struct event_constraint pair_constraint; + +static struct event_constraint * +amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (amd_is_pair_event_code(hwc)) + return &pair_constraint; + + return &unconstrained; +} + static ssize_t amd_event_sysfs_show(char *page, u64 config) { u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | @@ -898,33 +924,15 @@ static __initconst const struct x86_pmu amd_pmu = { static int __init amd_core_pmu_init(void) { + u64 even_ctr_mask = 0ULL; + int i; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; - /* Avoid calulating the value each time in the NMI handler */ + /* Avoid calculating the value each time in the NMI handler */ perf_nmi_window = msecs_to_jiffies(100); - switch (boot_cpu_data.x86) { - case 0x15: - pr_cont("Fam15h "); - x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - break; - case 0x17: - pr_cont("Fam17h "); - /* - * In family 17h, there are no event constraints in the PMC hardware. - * We fallback to using default amd_get_event_constraints. - */ - break; - case 0x18: - pr_cont("Fam18h "); - /* Using default amd_get_event_constraints. */ - break; - default: - pr_err("core perfctr but no constraints; unknown hardware!\n"); - return -ENODEV; - } - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also @@ -939,6 +947,30 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.amd_nb_constraints = 0; + if (boot_cpu_data.x86 == 0x15) { + pr_cont("Fam15h "); + x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; + } + if (boot_cpu_data.x86 >= 0x17) { + pr_cont("Fam17h+ "); + /* + * Family 17h and compatibles have constraints for Large + * Increment per Cycle events: they may only be assigned an + * even numbered counter that has a consecutive adjacent odd + * numbered counter following it. + */ + for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + even_ctr_mask |= 1 << i; + + pair_constraint = (struct event_constraint) + __EVENT_CONSTRAINT(0, even_ctr_mask, 0, + x86_pmu.num_counters / 2, 0, + PERF_X86_EVENT_PAIR); + + x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; + x86_pmu.flags |= PMU_FL_PAIR; + } + pr_cont("core perfctr, "); return 0; } diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa84..4d867a752f0e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3be51aa06e67..dff6623804c2 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4765,6 +4765,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index e1daf4151e11..4814c964692c 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,17 +40,18 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL + * Available model: SLM,AMT,GLM,CNL,TNT * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, - * CNL,KBL,CML + * CNL,KBL,CML,TNT * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -60,17 +61,18 @@ * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,TGL + * KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, - * GLM,CNL,KBL,CML,ICL,TGL + * GLM,CNL,KBL,CML,ICL,TGL,TNT * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -87,7 +89,8 @@ * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL + * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, + * TNT * Scope: Package (physical package) * */ @@ -640,8 +643,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_D, glm_cstates), - X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT_D, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_TREMONT, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ce83950036c5..e5ad97a82342 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1713,6 +1713,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 6f86650b3f77..a949f6f55991 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -75,8 +75,9 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_ATOM_GOLDMONT_D: - case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT_D: + case INTEL_FAM6_ATOM_TREMONT: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 930611db8f9a..e2fd363de649 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -77,6 +77,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */ #define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */ #define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */ +#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -743,6 +744,7 @@ do { \ #define PMU_FL_EXCL_ENABLED 0x8 /* exclusive counter active */ #define PMU_FL_PEBS_ALL 0x10 /* all events are valid PEBS events */ #define PMU_FL_TFA 0x20 /* deal with TSX force abort */ +#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */ #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2ebc17d9c72c..19e94af9cc5d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -140,6 +140,7 @@ extern void apic_soft_disable(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -188,6 +189,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -452,6 +454,14 @@ static inline void ack_APIC_irq(void) apic_eoi(); } + +static inline bool lapic_vector_set_in_irr(unsigned int vector) +{ + u32 irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + + return !!(irr & (1U << (vector % 32))); +} + static inline unsigned default_get_apic_id(unsigned long x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 22c4dfe65992..52e9f3480f69 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -177,23 +177,6 @@ typedef struct user_regs_struct compat_elf_gregset_t; (!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT)) #endif -/* - * A pointer passed in from user mode. This should not - * be used for syscall parameters, just declare them - * as pointers because the syscall entry code will have - * appropriately converted them already. - */ - -static inline void __user *compat_ptr(compat_uptr_t uptr) -{ - return (void __user *)(unsigned long)uptr; -} - -static inline compat_uptr_t ptr_to_compat(void __user *uptr) -{ - return (u32)(unsigned long)uptr; -} - static inline void __user *arch_compat_alloc_user_space(long len) { compat_uptr_t sp; diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 02c6ef8f7667..07344d82e88e 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -19,7 +19,14 @@ struct task_struct; void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); -void tss_update_io_bitmap(void); +void native_tss_update_io_bitmap(void); + +#ifdef CONFIG_PARAVIRT_XXL +#include +#else +#define tss_update_io_bitmap native_tss_update_io_bitmap +#endif + #else static inline void io_bitmap_share(struct task_struct *tsk) { } static inline void io_bitmap_exit(void) { } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b79cd6aa4075..7425c83fd343 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -378,12 +378,12 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + int (*page_fault)(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, + u32 access, struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, @@ -685,10 +685,10 @@ struct kvm_vcpu_arch { bool pvclock_set_guest_stopped_request; struct { + u8 preempted; u64 msr_val; u64 last_steal; - struct gfn_to_hva_cache stime; - struct kvm_steal_time steal; + struct gfn_to_pfn_cache cache; } st; u64 tsc_offset; @@ -1115,7 +1115,7 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); @@ -1145,6 +1145,7 @@ struct kvm_x86_ops { bool (*xsaves_supported)(void); bool (*umip_emulated)(void); bool (*pt_supported)(void); + bool (*pku_supported)(void); int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); @@ -1468,7 +1469,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h index c215d2762488..a4fddfe3d4fb 100644 --- a/arch/x86/include/asm/module.h +++ b/arch/x86/include/asm/module.h @@ -27,6 +27,36 @@ struct mod_arch_specific { #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE +#define MODULE_PROC_FAMILY "NATIVE " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -45,6 +75,28 @@ struct mod_arch_specific { #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 084e98da04a7..717660f82f8f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -512,6 +512,8 @@ #define MSR_K7_HWCR 0xc0010015 #define MSR_K7_HWCR_SMMLOCK_BIT 0 #define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_HWCR_IRPERF_EN_BIT 30 +#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 86e7317eb31f..694d8daf4983 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -295,6 +295,13 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } +#ifdef CONFIG_X86_IOPL_IOPERM +static inline void tss_update_io_bitmap(void) +{ + PVOP_VCALL0(cpu.update_io_bitmap); +} +#endif + static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 84812964d3dd..732f62e04ddb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,6 +140,10 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); +#ifdef CONFIG_X86_IOPL_IOPERM + void (*update_io_bitmap)(void); +#endif + void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 19435858df5f..96d9cd208610 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 9ec463fe96f2..2f1e2333bd0a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 28446fa6bf18..4b0f9117e1cd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -830,8 +830,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true; - /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true; /* Virt guests may lack ARAT, but still have DEADLINE */ @@ -1322,7 +1331,7 @@ void __init sync_Arb_IDs(void) enum apic_intr_mode_id apic_intr_mode __ro_after_init; -static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1384,6 +1393,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1440,8 +1455,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT); - apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7f7533462474..159bd0cb8548 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -23,10 +23,8 @@ static struct irq_domain *msi_default_domain; -static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +static void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg) { - struct irq_cfg *cfg = irqd_cfg(data); - msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) @@ -47,6 +45,127 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) MSI_DATA_VECTOR(cfg->vector); } +static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) +{ + __irq_msi_compose_msg(irqd_cfg(data), msg); +} + +static void irq_msi_update_msg(struct irq_data *irqd, struct irq_cfg *cfg) +{ + struct msi_msg msg[2] = { [1] = { }, }; + + __irq_msi_compose_msg(cfg, msg); + irq_data_get_irq_chip(irqd)->irq_write_msi_msg(irqd, msg); +} + +static int +msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) +{ + struct irq_cfg old_cfg, *cfg = irqd_cfg(irqd); + struct irq_data *parent = irqd->parent_data; + unsigned int cpu; + int ret; + + /* Save the current configuration */ + cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd)); + old_cfg = *cfg; + + /* Allocate a new target vector */ + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + /* + * For non-maskable and non-remapped MSI interrupts the migration + * to a different destination CPU and a different vector has to be + * done careful to handle the possible stray interrupt which can be + * caused by the non-atomic update of the address/data pair. + * + * Direct update is possible when: + * - The MSI is maskable (remapped MSI does not use this code path)). + * The quirk bit is not set in this case. + * - The new vector is the same as the old vector + * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The new destination CPU is the same as the old destination CPU + */ + if (!irqd_msi_nomask_quirk(irqd) || + cfg->vector == old_cfg.vector || + old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + cfg->dest_apicid == old_cfg.dest_apicid) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Paranoia: Validate that the interrupt target is the local + * CPU. + */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) { + irq_msi_update_msg(irqd, cfg); + return ret; + } + + /* + * Redirect the interrupt to the new vector on the current CPU + * first. This might cause a spurious interrupt on this vector if + * the device raises an interrupt right between this update and the + * update to the final destination CPU. + * + * If the vector is in use then the installed device handler will + * denote it as spurious which is no harm as this is a rare event + * and interrupt handlers have to cope with spurious interrupts + * anyway. If the vector is unused, then it is marked so it won't + * trigger the 'No irq handler for vector' warning in do_IRQ(). + * + * This requires to hold vector lock to prevent concurrent updates to + * the affected vector. + */ + lock_vector_lock(); + + /* + * Mark the new target vector on the local CPU if it is currently + * unused. Reuse the VECTOR_RETRIGGERED state which is also used in + * the CPU hotplug path for a similar purpose. This cannot be + * undone here as the current CPU has interrupts disabled and + * cannot handle the interrupt before the whole set_affinity() + * section is done. In the CPU unplug case, the current CPU is + * about to vanish and will not handle any interrupts anymore. The + * vector is cleaned up when the CPU comes online again. + */ + if (IS_ERR_OR_NULL(this_cpu_read(vector_irq[cfg->vector]))) + this_cpu_write(vector_irq[cfg->vector], VECTOR_RETRIGGERED); + + /* Redirect it to the new vector on the local CPU temporarily */ + old_cfg.vector = cfg->vector; + irq_msi_update_msg(irqd, &old_cfg); + + /* Now transition it to the target CPU */ + irq_msi_update_msg(irqd, cfg); + + /* + * All interrupts after this point are now targeted at the new + * vector/CPU. + * + * Drop vector lock before testing whether the temporary assignment + * to the local CPU was hit by an interrupt raised in the device, + * because the retrigger function acquires vector lock again. + */ + unlock_vector_lock(); + + /* + * Check whether the transition raced with a device interrupt and + * is pending in the local APICs IRR. It is safe to do this outside + * of vector lock as the irq_desc::lock of this interrupt is still + * held and interrupts are disabled: The check is not accessing the + * underlying vector store. It's just checking the local APIC's + * IRR. + */ + if (lapic_vector_set_in_irr(cfg->vector)) + irq_data_get_irq_chip(irqd)->irq_retrigger(irqd); + + return ret; +} + /* * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, * which implement the MSI or MSI-X Capability Structure. @@ -58,6 +177,7 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = irq_msi_compose_msg, + .irq_set_affinity = msi_set_affinity, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -146,6 +266,8 @@ void __init arch_init_msi_domain(struct irq_domain *parent) } if (!msi_default_domain) pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n"); + else + msi_default_domain->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } #ifdef CONFIG_IRQ_REMAP diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d5b51a740524..ad53b2abc859 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1493,65 +1493,34 @@ static void check_efi_reboot(void) } /* Setup user proc fs files */ -static int proc_hubbed_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubbed_system); return 0; } -static int proc_hubless_show(struct seq_file *file, void *data) +static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data) { seq_printf(file, "0x%x\n", uv_hubless_system); return 0; } -static int proc_oemid_show(struct seq_file *file, void *data) +static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data) { seq_printf(file, "%s/%s\n", oem_id, oem_table_id); return 0; } -static int proc_hubbed_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubbed_show, (void *)NULL); -} - -static int proc_hubless_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_hubless_show, (void *)NULL); -} - -static int proc_oemid_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_oemid_show, (void *)NULL); -} - -/* (struct is "non-const" as open function is set at runtime) */ -static struct file_operations proc_version_fops = { - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_oemid_fops = { - .open = proc_oemid_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static __init void uv_setup_proc_files(int hubless) { struct proc_dir_entry *pde; - char *name = hubless ? "hubless" : "hubbed"; pde = proc_mkdir(UV_PROC_NODE, NULL); - proc_create("oemid", 0, pde, &proc_oemid_fops); - proc_create(name, 0, pde, &proc_version_fops); + proc_create_single("oemid", 0, pde, proc_oemid_show); if (hubless) - proc_version_fops.open = proc_hubless_open; + proc_create_single("hubless", 0, pde, proc_hubless_show); else - proc_version_fops.open = proc_hubbed_open; + proc_create_single("hubbed", 0, pde, proc_hubbed_show); } /* Initialize UV hubless systems */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 62c30279be77..c3f4dd4ae155 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -28,6 +28,7 @@ static const int amd_erratum_383[]; static const int amd_erratum_400[]; +static const int amd_erratum_1054[]; static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum); /* @@ -978,6 +979,15 @@ static void init_amd(struct cpuinfo_x86 *c) /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + /* + * Turn on the Instructions Retired free counter on machines not + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (cpu_has(c, X86_FEATURE_IRPERF) && + !cpu_has_amd_erratum(c, amd_erratum_1054)) + msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); } #ifdef CONFIG_X86_32 @@ -1105,6 +1115,10 @@ static const int amd_erratum_400[] = static const int amd_erratum_383[] = AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf)); +/* #1054: Instructions Retired Performance Counter May Be Inaccurate */ +static const int amd_erratum_1054[] = + AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2e4d90294fe6..9761e9c56756 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -462,7 +462,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index d6cf5c18a7e0..f031c651dd32 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1161,9 +1161,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1196,8 +1199,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1241,16 +1245,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1258,7 +1258,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1343,8 +1343,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1355,9 +1353,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); @@ -1366,8 +1368,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1377,13 +1383,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index e270d0770134..c0e0726878e7 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -492,17 +492,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 6c3e1c92f183..f36dc0742085 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp) *temp = (msr_val >> 16) & 0x7F; } -static void throttle_active_work(struct work_struct *work) +static void __maybe_unused throttle_active_work(struct work_struct *work) { struct _thermal_state *state = container_of(to_delayed_work(work), struct _thermal_state, therm_work); @@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu) { struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + u32 l; + + /* Mask the thermal vector before draining evtl. pending work */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); - cancel_delayed_work(&state->package_throttle.therm_work); - cancel_delayed_work(&state->core_throttle.therm_work); + cancel_delayed_work_sync(&state->package_throttle.therm_work); + cancel_delayed_work_sync(&state->core_throttle.therm_work); state->package_throttle.rate_control_active = false; state->core_throttle.rate_control_active = false; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e49b77283924..181c992f448c 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -57,6 +57,7 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc) } DECLARE_STATIC_KEY_FALSE(rdt_enable_key); +DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); /** * struct mon_evt - Entry in the event list of a resource diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 397206f23d14..773124b0e18a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -514,7 +514,7 @@ void mbm_handle_overflow(struct work_struct *work) mutex_lock(&rdtgroup_mutex); - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); @@ -543,7 +543,7 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms) unsigned long delay = msecs_to_jiffies(delay_ms); int cpu; - if (!static_branch_likely(&rdt_enable_key)) + if (!static_branch_likely(&rdt_mon_enable_key)) return; cpu = cpumask_any(&dom->cpu_mask); dom->mbm_work_cpu = cpu; diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index dac7209a0708..954fd048ad9b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1970,7 +1970,7 @@ static int rdt_get_tree(struct fs_context *fc) if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret < 0) goto out_info; @@ -2205,7 +2205,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -2243,7 +2247,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2446,7 +2454,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -2645,7 +2653,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -2718,7 +2726,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -2729,7 +2737,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2767,7 +2775,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2810,7 +2818,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) { rdt_last_cmd_puts("kernfs subdir error\n"); goto out_del_list; @@ -2826,7 +2834,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -2952,13 +2960,13 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); + rdtgroup_ctrl_remove(kn, rdtgrp); + /* * Free all the child monitor group rmids. */ free_all_child_rdtgrp(rdtgrp); - rdtgroup_ctrl_remove(kn, rdtgrp); - return 0; } diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c index 3e20d322bc98..032509adf9de 100644 --- a/arch/x86/kernel/cpu/tsx.c +++ b/arch/x86/kernel/cpu/tsx.c @@ -115,11 +115,12 @@ void __init tsx_init(void) tsx_disable(); /* - * tsx_disable() will change the state of the - * RTM CPUID bit. Clear it here since it is now - * expected to be not set. + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. */ setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { /* @@ -131,10 +132,10 @@ void __init tsx_init(void) tsx_enable(); /* - * tsx_enable() will change the state of the - * RTM CPUID bit. Force it here since it is now - * expected to be set. + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. */ setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); } } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0071b794ed19..400a05e1c1c5 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -352,6 +352,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpregs_unlock(); return 0; } + fpregs_deactivate(fpu); fpregs_unlock(); } @@ -403,6 +404,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } if (!ret) fpregs_mark_activate(); + else + fpregs_deactivate(fpu); fpregs_unlock(); err_out: diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 4d4f5d9faac3..23054909c8dd 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -10,8 +10,6 @@ extern struct boot_params boot_params; static enum efi_secureboot_mode get_sb_mode(void) { - efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; - efi_char16_t efi_SetupMode_name[] = L"SecureBoot"; efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; efi_status_t status; unsigned long size; @@ -25,7 +23,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } /* Get variable contents into buffer */ - status = efi.get_variable(efi_SecureBoot_name, &efi_variable_guid, + status = efi.get_variable(L"SecureBoot", &efi_variable_guid, NULL, &size, &secboot); if (status == EFI_NOT_FOUND) { pr_info("ima: secureboot mode disabled\n"); @@ -38,7 +36,7 @@ static enum efi_secureboot_mode get_sb_mode(void) } size = sizeof(setupmode); - status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid, + status = efi.get_variable(L"SetupMode", &efi_variable_guid, NULL, &size, &setupmode); if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e676a9916c49..54c21d6abd5a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + u64 whole_msecs = READ_ONCE(action->max_duration); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); + + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, whole_msecs, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 789f5e4f89de..c131ba4e70ef 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * nop stub, which must not clobber anything *including the stack* to @@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, +#ifdef CONFIG_X86_IOPL_IOPERM + .cpu.update_io_bitmap = native_tss_update_io_bitmap, +#endif + .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 61e93a318983..3363e71589dd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) /** * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode */ -void tss_update_io_bitmap(void) +void native_tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); struct thread_struct *t = ¤t->thread; diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 01f0e2263b86..298fc1edd9c9 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -90,11 +90,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 7ce29cee9f9e..d8673d8a779b 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -91,10 +91,18 @@ void __init hpet_time_init(void) static __init void x86_late_time_init(void) { + /* + * Before PIT/HPET init, select the interrupt mode. This is required + * to make the decision whether PIT should be initialized correct. + */ + x86_init.irqs.intr_mode_select(); + + /* Setup the legacy timers */ x86_init.timers.timer_init(); + /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. + * After PIT/HPET timers init, set up the final interrupt mode for + * delivering IRQs. */ x86_init.irqs.intr_mode_init(); tsc_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ce89430a7f80..9a89261dcd2a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -80,6 +80,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index cf55629ff0ff..50678fe6a504 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -352,6 +352,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57; + unsigned f_pku = kvm_x86_ops->pku_supported() ? F(PKU) : 0; /* cpuid 7.0.ebx */ const u32 kvm_cpuid_7_0_ebx_x86_features = @@ -363,7 +364,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | + F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/; @@ -392,6 +393,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; + entry->ecx |= f_pku; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 952d1a4f4d7e..6023d7917494 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -22,6 +22,7 @@ #include "kvm_cache_regs.h" #include #include +#include #include #include @@ -1075,8 +1076,23 @@ static void fetch_register_operand(struct operand *op) } } +static void emulator_get_fpu(void) +{ + fpregs_lock(); + + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); +} + +static void emulator_put_fpu(void) +{ + fpregs_unlock(); +} + static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; @@ -1098,11 +1114,13 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) #endif default: BUG(); } + emulator_put_fpu(); } static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; @@ -1124,10 +1142,12 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, #endif default: BUG(); } + emulator_put_fpu(); } static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %%mm0, %0" : "=m"(*data)); break; case 1: asm("movq %%mm1, %0" : "=m"(*data)); break; @@ -1139,10 +1159,12 @@ static void read_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %%mm7, %0" : "=m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) { + emulator_get_fpu(); switch (reg) { case 0: asm("movq %0, %%mm0" : : "m"(*data)); break; case 1: asm("movq %0, %%mm1" : : "m"(*data)); break; @@ -1154,6 +1176,7 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) case 7: asm("movq %0, %%mm7" : : "m"(*data)); break; default: BUG(); } + emulator_put_fpu(); } static int em_fninit(struct x86_emulate_ctxt *ctxt) @@ -1161,7 +1184,9 @@ static int em_fninit(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fninit"); + emulator_put_fpu(); return X86EMUL_CONTINUE; } @@ -1172,7 +1197,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstcw %0": "+m"(fcw)); + emulator_put_fpu(); ctxt->dst.val = fcw; @@ -1186,7 +1213,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt) if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) return emulate_nm(ctxt); + emulator_get_fpu(); asm volatile("fnstsw %0": "+m"(fsw)); + emulator_put_fpu(); ctxt->dst.val = fsw; @@ -4092,8 +4121,12 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + emulator_put_fpu(); + if (rc != X86EMUL_CONTINUE) return rc; @@ -4136,6 +4169,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + emulator_get_fpu(); + if (size < __fxstate_size(16)) { rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) @@ -4151,6 +4186,8 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); out: + emulator_put_fpu(); + return rc; } @@ -5158,6 +5195,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { @@ -5210,16 +5248,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5303,10 +5353,15 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) @@ -5448,7 +5503,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt) { int rc; + emulator_get_fpu(); rc = asm_safe("fwait"); + emulator_put_fpu(); if (unlikely(rc != X86EMUL_CONTINUE)) return emulate_exception(ctxt, MF_VECTOR, 0, false); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 23ff65504d7e..26408434b9bc 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -809,11 +809,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -852,11 +853,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8b38bb4868a6..629a09ca9860 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 9fd2dd89a1c5..8aa58727045e 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -68,13 +69,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -292,6 +294,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8ecd48d31800..5ddcaacef291 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -416,7 +416,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf9177b4a07f..a1daebe2a60f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -630,9 +630,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } @@ -1049,11 +1051,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } @@ -1151,7 +1150,7 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq, if (!kvm_apic_present(vcpu)) continue; if (!kvm_apic_match_dest(vcpu, NULL, - irq->delivery_mode, + irq->shorthand, irq->dest_id, irq->dest_mode)) continue; @@ -1963,15 +1962,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6f92b40d798c..d21b69bbd6f4 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -418,22 +418,24 @@ static inline bool is_access_track_spte(u64 spte) * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 #define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ MMIO_SPTE_GEN_LOW_START) -#define MMIO_SPTE_GEN_HIGH_START 52 -#define MMIO_SPTE_GEN_HIGH_END 61 +#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT +#define MMIO_SPTE_GEN_HIGH_END 62 #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); + BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; @@ -444,8 +446,6 @@ static u64 get_mmio_spte_generation(u64 spte) { u64 gen; - spte &= ~shadow_mmio_mask; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; return gen; @@ -538,16 +538,20 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); static u8 kvm_get_shadow_phys_bits(void) { /* - * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected - * in CPU detection code, but MKTME treats those reduced bits as - * 'keyID' thus they are not reserved bits. Therefore for MKTME - * we should still return physical address bits reported by CPUID. + * boot_cpu_data.x86_phys_bits is reduced when MKTME or SME are detected + * in CPU detection code, but the processor treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore KVM needs to look at + * the physical address bits reported by CPUID. */ - if (!boot_cpu_has(X86_FEATURE_TME) || - WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) - return boot_cpu_data.x86_phys_bits; + if (likely(boot_cpu_data.extended_cpuid_level >= 0x80000008)) + return cpuid_eax(0x80000008) & 0xff; - return cpuid_eax(0x80000008) & 0xff; + /* + * Quite weird to have VMX or SVM but not MAXPHYADDR; probably a VM with + * custom CPUID. Proceed with whatever the kernel found since these features + * aren't virtualizable (SME/SEV also require CPUIDs higher than 0x80000008). + */ + return boot_cpu_data.x86_phys_bits; } static void kvm_mmu_reset_all_pte_masks(void) @@ -1282,12 +1286,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) +static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) { unsigned long page_size; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) @@ -1337,7 +1341,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, if (unlikely(*force_pt_level)) return PT_PAGE_TABLE_LEVEL; - host_level = host_mapping_level(vcpu->kvm, large_gfn); + host_level = host_mapping_level(vcpu, large_gfn); if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; @@ -3528,7 +3532,7 @@ static bool is_access_allowed(u32 fault_err_code, u64 spte) * - true: let the vcpu to access on the same address again. * - false: let the real page fault path to fix it. */ -static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, +static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int level, u32 error_code) { struct kvm_shadow_walk_iterator iterator; @@ -3548,7 +3552,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, do { u64 new_spte; - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) + for_each_shadow_entry_lockless(vcpu, cr2_or_gpa, iterator, spte) if (!is_shadow_present_pte(spte) || iterator.level < level) break; @@ -3626,7 +3630,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } while (true); - trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, + trace_fast_page_fault(vcpu, cr2_or_gpa, error_code, iterator.sptep, spte, fault_handled); walk_shadow_page_lockless_end(vcpu); @@ -3634,10 +3638,11 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable); + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable); static int make_mmu_pages_available(struct kvm_vcpu *vcpu); -static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, +static int nonpaging_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, gfn_t gfn, bool prefault) { int r; @@ -3663,16 +3668,16 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1); } - if (fast_page_fault(vcpu, v, level, error_code)) + if (fast_page_fault(vcpu, gpa, level, error_code)) return RET_PF_RETRY; mmu_seq = vcpu->kvm->mmu_notifier_seq; smp_rmb(); - if (try_async_pf(vcpu, prefault, gfn, v, &pfn, write, &map_writable)) + if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable)) return RET_PF_RETRY; - if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + if (handle_abnormal_pfn(vcpu, gpa, gfn, pfn, ACC_ALL, &r)) return r; r = RET_PF_RETRY; @@ -3683,7 +3688,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, goto out_unlock; if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, pfn, + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); @@ -3981,7 +3986,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots); -static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { if (exception) @@ -3989,7 +3994,7 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, return vaddr; } -static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, +static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -4149,13 +4154,14 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } -static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, +static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { - gfn_t gfn = gva >> PAGE_SHIFT; + gfn_t gfn = gpa >> PAGE_SHIFT; int r; - pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); + /* Note, paging is disabled, ergo gva == gpa. */ + pgprintk("%s: gva %lx error %x\n", __func__, gpa, error_code); if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; @@ -4167,11 +4173,12 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); - return nonpaging_map(vcpu, gva & PAGE_MASK, + return nonpaging_map(vcpu, gpa & PAGE_MASK, error_code, gfn, prefault); } -static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) +static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + gfn_t gfn) { struct kvm_arch_async_pf arch; @@ -4180,11 +4187,13 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); - return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); + return kvm_setup_async_pf(vcpu, cr2_or_gpa, + kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, - gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) + gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, + bool *writable) { struct kvm_memory_slot *slot; bool async; @@ -4204,12 +4213,12 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, return false; /* *pfn has correct page already */ if (!prefault && kvm_can_do_async_pf(vcpu)) { - trace_kvm_try_async_get_page(gva, gfn); + trace_kvm_try_async_get_page(cr2_or_gpa, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { - trace_kvm_async_pf_doublefault(gva, gfn); + trace_kvm_async_pf_doublefault(cr2_or_gpa, gfn); kvm_make_request(KVM_REQ_APF_HALT, vcpu); return true; - } else if (kvm_arch_setup_async_pf(vcpu, gva, gfn)) + } else if (kvm_arch_setup_async_pf(vcpu, cr2_or_gpa, gfn)) return true; } @@ -4222,6 +4231,12 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, { int r = 1; +#ifndef CONFIG_X86_64 + /* A 64-bit CR2 should be impossible on 32-bit KVM. */ + if (WARN_ON_ONCE(fault_address >> 32)) + return -EFAULT; +#endif + vcpu->arch.l1tf_flush_l1d = true; switch (vcpu->arch.apf.host_apf_reason) { default: @@ -4259,7 +4274,7 @@ check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) return kvm_mtrr_check_gfn_range_consistency(vcpu, gfn, page_num); } -static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, +static int tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, bool prefault) { kvm_pfn_t pfn; @@ -5516,7 +5531,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) return 0; } -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len) { int r, emulation_type = 0; @@ -5525,18 +5540,18 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, /* With shadow page tables, fault_address contains a GVA or nGPA. */ if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; - vcpu->arch.gpa_val = cr2; + vcpu->arch.gpa_val = cr2_or_gpa; } r = RET_PF_INVALID; if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, cr2, direct); + r = handle_mmio_page_fault(vcpu, cr2_or_gpa, direct); if (r == RET_PF_EMULATE) goto emulate; } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu->page_fault(vcpu, cr2, + r = vcpu->arch.mmu->page_fault(vcpu, cr2_or_gpa, lower_32_bits(error_code), false); WARN_ON(r == RET_PF_INVALID); @@ -5556,7 +5571,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, */ if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { - kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa)); return 1; } @@ -5571,7 +5586,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * explicitly shadowing L1's page tables, i.e. unprotecting something * for L1 isn't going to magically fix whatever issue cause L2 to fail. */ - if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu)) + if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu)) emulation_type = EMULTYPE_ALLOW_RETRY; emulate: /* @@ -5586,7 +5601,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, return 1; } - return x86_emulate_instruction(vcpu, cr2, emulation_type, insn, + return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn, insn_len); } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); @@ -6249,7 +6264,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) + if (shadow_phys_bits == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 97b21e7fd013..4e3f137ffa8c 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -33,7 +33,7 @@ #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 - #define PT_MAX_FULL_LEVELS 4 + #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL #define CMPXCHG cmpxchg #else #define CMPXCHG cmpxchg64 @@ -291,11 +291,11 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte) } /* - * Fetch a guest pte for a guest virtual address + * Fetch a guest pte for a guest virtual address, or for an L2's GPA. */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, u32 access) + gpa_t addr, u32 access) { int ret; pt_element_t pte; @@ -496,7 +496,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, u32 access) + struct kvm_vcpu *vcpu, gpa_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); @@ -611,7 +611,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, * If the guest tries to write a write-protected page, we need to * emulate this operation, return 1 to indicate this case. */ -static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, +static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, struct guest_walker *gw, int write_fault, int hlevel, kvm_pfn_t pfn, bool map_writable, bool prefault, @@ -765,7 +765,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, * Returns: 1 if we need to emulate the instruction, 0 otherwise, or * a negative value on error. */ -static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, +static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, bool prefault) { int write_fault = error_code & PFERR_WRITE_MASK; @@ -945,18 +945,19 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa) spin_unlock(&vcpu->kvm->mmu_lock); } -static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, +/* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */ +static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access, struct x86_exception *exception) { struct guest_walker walker; gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); + r = FNAME(walk_addr)(&walker, vcpu, addr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); - gpa |= vaddr & ~PAGE_MASK; + gpa |= addr & ~PAGE_MASK; } else if (exception) *exception = walker.fault; @@ -964,7 +965,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, } #if PTTYPE != PTTYPE_EPT -static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, +/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */ +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr, u32 access, struct x86_exception *exception) { @@ -972,6 +974,11 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; +#ifndef CONFIG_X86_64 + /* A 64-bit GVA should be impossible on 32-bit KVM. */ + WARN_ON_ONCE(vaddr >> 32); +#endif + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 7ca8831c7d1a..3c6522b84ff1 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -249,13 +249,13 @@ TRACE_EVENT( TRACE_EVENT( fast_page_fault, - TP_PROTO(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + TP_PROTO(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u32 error_code, u64 *sptep, u64 old_spte, bool retry), - TP_ARGS(vcpu, gva, error_code, sptep, old_spte, retry), + TP_ARGS(vcpu, cr2_or_gpa, error_code, sptep, old_spte, retry), TP_STRUCT__entry( __field(int, vcpu_id) - __field(gva_t, gva) + __field(gpa_t, cr2_or_gpa) __field(u32, error_code) __field(u64 *, sptep) __field(u64, old_spte) @@ -265,7 +265,7 @@ TRACE_EVENT( TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->gva = gva; + __entry->cr2_or_gpa = cr2_or_gpa; __entry->error_code = error_code; __entry->sptep = sptep; __entry->old_spte = old_spte; @@ -273,9 +273,9 @@ TRACE_EVENT( __entry->retry = retry; ), - TP_printk("vcpu %d gva %lx error_code %s sptep %p old %#llx" + TP_printk("vcpu %d gva %llx error_code %s sptep %p old %#llx" " new %llx spurious %d fixed %d", __entry->vcpu_id, - __entry->gva, __print_flags(__entry->error_code, "|", + __entry->cr2_or_gpa, __print_flags(__entry->error_code, "|", kvm_mmu_trace_pferr_flags), __entry->sptep, __entry->old_spte, __entry->new_spte, __spte_satisfied(old_spte), __spte_satisfied(new_spte) diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index 25ce3edd1872..7f0059aa30e1 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -192,11 +192,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7ebb62326c14..13332984b6d5 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -102,8 +104,12 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -113,8 +119,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 122d4ce3b1ab..2b3d8feec313 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1307,6 +1307,47 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1361,6 +1402,8 @@ static __init int svm_hardware_setup(void) } } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -5160,8 +5203,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -5173,6 +5219,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) put_cpu(); } else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -6001,6 +6049,11 @@ static bool svm_has_wbinvd_exit(void) return true; } +static bool svm_pku_supported(void) +{ + return false; +} + #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -7341,6 +7394,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .xsaves_supported = svm_xsaves_supported, .umip_emulated = svm_umip_emulated, .pt_supported = svm_pt_supported, + .pku_supported = svm_pku_supported, .set_supported_cpuid = svm_set_supported_cpuid, diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 7aa69716d516..f486e2606247 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -12,6 +12,7 @@ extern bool __read_mostly enable_ept; extern bool __read_mostly enable_unrestricted_guest; extern bool __read_mostly enable_ept_ad_bits; extern bool __read_mostly enable_pml; +extern bool __read_mostly enable_apicv; extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 @@ -145,6 +146,11 @@ static inline bool vmx_umip_emulated(void) SECONDARY_EXEC_DESC; } +static inline bool vmx_pku_supported(void) +{ + return boot_cpu_has(X86_FEATURE_PKU); +} + static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 4aea7d304beb..54a1a727249d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -234,7 +234,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) return; kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); - vmx->nested.hv_evmcs_vmptr = -1ull; + vmx->nested.hv_evmcs_vmptr = 0; vmx->nested.hv_evmcs = NULL; } @@ -1932,7 +1932,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) return 1; - if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { + if (unlikely(!vmx->nested.hv_evmcs || + evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { if (!vmx->nested.hv_evmcs) vmx->nested.current_vmptr = -1ull; @@ -3583,6 +3584,33 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual); } +/* + * Returns true if a debug trap is pending delivery. + * + * In KVM, debug traps bear an exception payload. As such, the class of a #DB + * exception may be inferred from the presence of an exception payload. + */ +static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.exception.pending && + vcpu->arch.exception.nr == DB_VECTOR && + vcpu->arch.exception.payload; +} + +/* + * Certain VM-exits set the 'pending debug exceptions' field to indicate a + * recognized #DB (data or single-step) that has yet to be delivered. Since KVM + * represents these debug traps with a payload that is said to be compatible + * with the 'pending debug exceptions' field, write the payload to the VMCS + * field if a VM-exit is delivered before the debug trap. + */ +static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) +{ + if (vmx_pending_dbg_trap(vcpu)) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vcpu->arch.exception.payload); +} + static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3595,6 +3623,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) test_bit(KVM_APIC_INIT, &apic->pending_events)) { if (block_nested_events) return -EBUSY; + nested_vmx_update_pending_dbg(vcpu); clear_bit(KVM_APIC_INIT, &apic->pending_events); nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0); return 0; @@ -4753,32 +4782,28 @@ static int handle_vmread(struct kvm_vcpu *vcpu) { unsigned long field; u64 field_value; + struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); int len; gva_t gva = 0; - struct vmcs12 *vmcs12; + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); struct x86_exception e; short offset; if (!nested_vmx_check_permission(vcpu)) return 1; - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu)) - vmcs12 = get_vmcs12(vcpu); - else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMREAD - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); @@ -4807,8 +4832,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) vmx_instruction_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) + if (kvm_write_guest_virt_system(vcpu, gva, &field_value, len, &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } return nested_vmx_succeed(vcpu); @@ -4855,13 +4882,20 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) */ u64 field_value = 0; struct x86_exception e; - struct vmcs12 *vmcs12; + struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) + : get_vmcs12(vcpu); short offset; if (!nested_vmx_check_permission(vcpu)) return 1; - if (vmx->nested.current_vmptr == -1ull) + /* + * In VMX non-root operation, when the VMCS-link pointer is -1ull, + * any VMWRITE sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == -1ull || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == -1ull)) return nested_vmx_failInvalid(vcpu); if (vmx_instruction_info & (1u << 10)) @@ -4880,6 +4914,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + + offset = vmcs_field_to_offset(field); + if (offset < 0) + return nested_vmx_failValid(vcpu, + VMXERR_UNSUPPORTED_VMCS_COMPONENT); + /* * If the vCPU supports "VMWRITE to any supported field in the * VMCS," then the "read-only" fields are actually read/write. @@ -4889,29 +4929,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - if (!is_guest_mode(vcpu)) { - vmcs12 = get_vmcs12(vcpu); - - /* - * Ensure vmcs12 is up-to-date before any VMWRITE that dirties - * vmcs12, else we may crush a field or consume a stale value. - */ - if (!is_shadow_field_rw(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - } else { - /* - * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE - * to shadowed-field sets the ALU flags for VMfailInvalid. - */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) - return nested_vmx_failInvalid(vcpu); - vmcs12 = get_shadow_vmcs12(vcpu); - } - - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_failValid(vcpu, - VMXERR_UNSUPPORTED_VMCS_COMPONENT); + /* + * Ensure vmcs12 is up-to-date before any VMWRITE that dirties + * vmcs12, else we may crush a field or consume a stale value. + */ + if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); /* * Some Intel CPUs intentionally drop the reserved bits of the AR byte @@ -5274,24 +5297,17 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) return 1; } - -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; @@ -5318,6 +5334,24 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, return false; } +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned short port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Return 1 if we should exit from L2 to L1 to handle an MSR access access, * rather than handle it ourselves in L0. I.e., check whether L1 expressed @@ -5938,8 +5972,7 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv) +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) { /* * Note that as a general rule, the high half of the MSRs (bits in @@ -5966,7 +5999,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | - (apicv ? PIN_BASED_POSTED_INTR : 0); + (enable_apicv ? PIN_BASED_POSTED_INTR : 0); msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index fc874d4ead0f..e1c7faed7df4 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -17,8 +17,7 @@ enum nvmx_vmentry_status { }; void vmx_leave_nested(struct kvm_vcpu *vcpu); -void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, - bool apicv); +void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); void nested_vmx_hardware_unsetup(void); __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); @@ -34,6 +33,8 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, u32 vmx_instruction_info, bool wr, int len, gva_t *ret); void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu); +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size); static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7023138b1cb0..34a3a17bb6d7 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -86,10 +86,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -130,16 +134,20 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) - return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..3e381b31b9a6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -95,7 +95,7 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv = 1; +bool __read_mostly enable_apicv = 1; module_param(enable_apicv, bool, S_IRUGO); /* @@ -2146,6 +2146,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_num_address_ranges))) return 1; + if (is_noncanonical_address(data, vcpu)) + return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; else @@ -2966,6 +2968,9 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; @@ -3843,24 +3848,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* @@ -5902,34 +5912,39 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } } - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { + if (exit_reason >= kvm_vmx_max_exit_handlers) + goto unexpected_vmexit; #ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); #endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = + + exit_reason = array_index_nospec(exit_reason, + kvm_vmx_max_exit_handlers); + if (!kvm_vmx_exit_handlers[exit_reason]) + goto unexpected_vmexit; + + return kvm_vmx_exit_handlers[exit_reason](vcpu); + +unexpected_vmexit: + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; } /* @@ -6793,8 +6808,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, - vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); + vmx_capability.ept); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); @@ -6874,8 +6888,7 @@ static int __init vmx_check_processor_compat(void) if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) - nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, - enable_apicv); + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -7136,6 +7149,40 @@ static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) to_vmx(vcpu)->req_immediate_exit = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) @@ -7143,19 +7190,45 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + switch (info->intercept) { /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. */ - if (info->intercept == x86_intercept_rdtscp && - !nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { - ctxt->exception.vector = UD_VECTOR; - ctxt->exception.error_code_valid = false; - return X86EMUL_PROPAGATE_FAULT; - } + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); + + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; /* TODO: check more intercepts... */ - return X86EMUL_CONTINUE; + default: + break; + } + + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 @@ -7737,7 +7810,7 @@ static __init int hardware_setup(void) if (nested) { nested_vmx_setup_ctls_msrs(&vmcs_config.nested, - vmx_capability.ept, enable_apicv); + vmx_capability.ept); r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) @@ -7870,6 +7943,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf917139de6b..dafb5aff200f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,6 +93,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif +static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; + #define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ #define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ @@ -435,6 +437,14 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) * for #DB exceptions under VMX. */ vcpu->arch.dr6 ^= payload & DR6_RTM; + + /* + * The #DB payload is defined as compatible with the 'pending + * debug exceptions' field under VMX, not DR6. While bit 12 is + * defined in the 'pending debug exceptions' field (enabled + * breakpoint), it is reserved and must be zero in DR6. + */ + vcpu->arch.dr6 &= ~BIT(12); break; case PF_VECTOR: vcpu->arch.cr2 = payload; @@ -879,9 +889,38 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); +static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +{ + u64 reserved_bits = CR4_RESERVED_BITS; + + if (!cpu_has(c, X86_FEATURE_XSAVE)) + reserved_bits |= X86_CR4_OSXSAVE; + + if (!cpu_has(c, X86_FEATURE_SMEP)) + reserved_bits |= X86_CR4_SMEP; + + if (!cpu_has(c, X86_FEATURE_SMAP)) + reserved_bits |= X86_CR4_SMAP; + + if (!cpu_has(c, X86_FEATURE_FSGSBASE)) + reserved_bits |= X86_CR4_FSGSBASE; + + if (!cpu_has(c, X86_FEATURE_PKU)) + reserved_bits |= X86_CR4_PKE; + + if (!cpu_has(c, X86_FEATURE_LA57) && + !(cpuid_ecx(0x7) & bit(X86_FEATURE_LA57))) + reserved_bits |= X86_CR4_LA57; + + if (!cpu_has(c, X86_FEATURE_UMIP) && !kvm_x86_ops->umip_emulated()) + reserved_bits |= X86_CR4_UMIP; + + return reserved_bits; +} + static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESERVED_BITS) + if (cr4 & cr4_reserved_bits) return -EINVAL; if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE)) @@ -1047,9 +1086,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -1086,9 +1127,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -1545,6 +1588,8 @@ struct pvclock_clock { u64 mask; u32 mult; u32 shift; + u64 base_cycles; + u64 offset; }; struct pvclock_gtod_data { @@ -1553,11 +1598,8 @@ struct pvclock_gtod_data { struct pvclock_clock clock; /* extract of a clocksource struct */ struct pvclock_clock raw_clock; /* extract of a clocksource struct */ - u64 boot_ns_raw; - u64 boot_ns; - u64 nsec_base; + ktime_t offs_boot; u64 wall_time_sec; - u64 monotonic_raw_nsec; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -1565,10 +1607,6 @@ static struct pvclock_gtod_data pvclock_gtod_data; static void update_pvclock_gtod(struct timekeeper *tk) { struct pvclock_gtod_data *vdata = &pvclock_gtod_data; - u64 boot_ns, boot_ns_raw; - - boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot)); - boot_ns_raw = ktime_to_ns(ktime_add(tk->tkr_raw.base, tk->offs_boot)); write_seqcount_begin(&vdata->seq); @@ -1578,23 +1616,35 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->clock.mask = tk->tkr_mono.mask; vdata->clock.mult = tk->tkr_mono.mult; vdata->clock.shift = tk->tkr_mono.shift; + vdata->clock.base_cycles = tk->tkr_mono.xtime_nsec; + vdata->clock.offset = tk->tkr_mono.base; vdata->raw_clock.vclock_mode = tk->tkr_raw.clock->archdata.vclock_mode; vdata->raw_clock.cycle_last = tk->tkr_raw.cycle_last; vdata->raw_clock.mask = tk->tkr_raw.mask; vdata->raw_clock.mult = tk->tkr_raw.mult; vdata->raw_clock.shift = tk->tkr_raw.shift; - - vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->tkr_mono.xtime_nsec; + vdata->raw_clock.base_cycles = tk->tkr_raw.xtime_nsec; + vdata->raw_clock.offset = tk->tkr_raw.base; vdata->wall_time_sec = tk->xtime_sec; - vdata->boot_ns_raw = boot_ns_raw; - vdata->monotonic_raw_nsec = tk->tkr_raw.xtime_nsec; + vdata->offs_boot = tk->offs_boot; write_seqcount_end(&vdata->seq); } + +static s64 get_kvmclock_base_ns(void) +{ + /* Count up from boot time, but with the frequency of the raw clock. */ + return ktime_to_ns(ktime_add(ktime_get_raw(), pvclock_gtod_data.offs_boot)); +} +#else +static s64 get_kvmclock_base_ns(void) +{ + /* Master clock not used, so we can just use CLOCK_BOOTTIME. */ + return ktime_get_boottime_ns(); +} #endif void kvm_set_pending_timer(struct kvm_vcpu *vcpu) @@ -1608,7 +1658,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) int version; int r; struct pvclock_wall_clock wc; - struct timespec64 boot; + u64 wall_nsec; if (!wall_clock) return; @@ -1628,17 +1678,12 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) /* * The guest calculates current wall clock time by adding * system time (updated by kvm_guest_time_update below) to the - * wall clock specified here. guest system time equals host - * system time for us, thus we must fill in host boot time here. + * wall clock specified here. We do the reverse here. */ - getboottime64(&boot); + wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm); - if (kvm->arch.kvmclock_offset) { - struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset); - boot = timespec64_sub(boot, ts); - } - wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */ - wc.nsec = boot.tv_nsec; + wc.nsec = do_div(wall_nsec, 1000000000); + wc.sec = (u32)wall_nsec; /* overflow in 2106 guest time */ wc.version = version; kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); @@ -1886,7 +1931,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boottime_ns(); + ns = get_kvmclock_base_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2061,10 +2106,10 @@ static int do_monotonic_raw(s64 *t, u64 *tsc_timestamp) do { seq = read_seqcount_begin(>od->seq); - ns = gtod->monotonic_raw_nsec; + ns = gtod->raw_clock.base_cycles; ns += vgettsc(>od->raw_clock, tsc_timestamp, &mode); - ns >>= gtod->clock.shift; - ns += gtod->boot_ns_raw; + ns >>= gtod->raw_clock.shift; + ns += ktime_to_ns(ktime_add(gtod->raw_clock.offset, gtod->offs_boot)); } while (unlikely(read_seqcount_retry(>od->seq, seq))); *t = ns; @@ -2081,7 +2126,7 @@ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp) do { seq = read_seqcount_begin(>od->seq); ts->tv_sec = gtod->wall_time_sec; - ns = gtod->nsec_base; + ns = gtod->clock.base_cycles; ns += vgettsc(>od->clock, tsc_timestamp, &mode); ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); @@ -2224,7 +2269,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boottime_ns() + ka->kvmclock_offset; + return get_kvmclock_base_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2240,7 +2285,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boottime_ns() + ka->kvmclock_offset; + ret = get_kvmclock_base_ns() + ka->kvmclock_offset; put_cpu(); @@ -2339,7 +2384,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boottime_ns(); + kernel_ns = get_kvmclock_base_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -2379,6 +2424,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_guest_tsc = tsc_timestamp; + WARN_ON(vcpu->hv_clock.system_time < 0); /* If the host uses TSC clocksource, then it is stable */ pvclock_flags = 0; @@ -2485,7 +2531,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2581,45 +2630,47 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) static void record_steal_time(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) + /* -EAGAIN is returned in atomic context so we can just return. */ + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, + &map, &vcpu->arch.st.cache, false)) return; + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + /* * Doing a TLB flush here, on the guest's behalf, can avoid * expensive IPIs. */ trace_kvm_pv_tlb_flush(vcpu->vcpu_id, - vcpu->arch.st.steal.preempted & KVM_VCPU_FLUSH_TLB); - if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB) + st->preempted & KVM_VCPU_FLUSH_TLB); + if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) kvm_vcpu_flush_tlb(vcpu, false); - if (vcpu->arch.st.steal.version & 1) - vcpu->arch.st.steal.version += 1; /* first time write, random junk */ + vcpu->arch.st.preempted = 0; - vcpu->arch.st.steal.version += 1; + if (st->version & 1) + st->version += 1; /* first time write, random junk */ - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + st->version += 1; smp_wmb(); - vcpu->arch.st.steal.steal += current->sched_info.run_delay - + st->steal += current->sched_info.run_delay - vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); - smp_wmb(); - vcpu->arch.st.steal.version += 1; + st->version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false); } int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -2786,11 +2837,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, - data & KVM_STEAL_VALID_BITS, - sizeof(struct kvm_steal_time))) - return 1; - vcpu->arch.st.msr_val = data; if (!(data & KVM_MSR_ENABLED)) @@ -2926,7 +2972,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -3458,10 +3507,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops->vcpu_load(vcpu, cpu); - fpregs_assert_state_consistent(); - if (test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_return(); - /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3501,15 +3546,25 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) { + struct kvm_host_map map; + struct kvm_steal_time *st; + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED; + if (vcpu->arch.st.preempted) + return; + + if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map, + &vcpu->arch.st.cache, true)) + return; + + st = map.hva + + offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS); + + st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED; - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, - &vcpu->arch.st.steal.preempted, - offsetof(struct kvm_steal_time, preempted), - sizeof(vcpu->arch.st.steal.preempted)); + kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6379,11 +6434,11 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) return 1; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool write_fault_to_shadow_pgtable, int emulation_type) { - gpa_t gpa = cr2; + gpa_t gpa = cr2_or_gpa; kvm_pfn_t pfn; if (!(emulation_type & EMULTYPE_ALLOW_RETRY)) @@ -6397,7 +6452,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, * Write permission should be allowed since only * write access need to be emulated. */ - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); /* * If the mapping is invalid in guest, let cpu retry @@ -6454,10 +6509,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, - unsigned long cr2, int emulation_type) + gpa_t cr2_or_gpa, int emulation_type) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - unsigned long last_retry_eip, last_retry_addr, gpa = cr2; + unsigned long last_retry_eip, last_retry_addr, gpa = cr2_or_gpa; last_retry_eip = vcpu->arch.last_retry_eip; last_retry_addr = vcpu->arch.last_retry_addr; @@ -6486,14 +6541,14 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (x86_page_table_writing_insn(ctxt)) return false; - if (ctxt->eip == last_retry_eip && last_retry_addr == cr2) + if (ctxt->eip == last_retry_eip && last_retry_addr == cr2_or_gpa) return false; vcpu->arch.last_retry_eip = ctxt->eip; - vcpu->arch.last_retry_addr = cr2; + vcpu->arch.last_retry_addr = cr2_or_gpa; if (!vcpu->arch.mmu->direct_map) - gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2_or_gpa, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6639,11 +6694,8 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) return false; } -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, - int emulation_type, - void *insn, - int insn_len) +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + int emulation_type, void *insn, int insn_len) { int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; @@ -6689,8 +6741,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, - emulation_type)) + if (reexecute_instruction(vcpu, cr2_or_gpa, + write_fault_to_spt, + emulation_type)) return 1; if (ctxt->have_exception) { /* @@ -6724,7 +6777,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; } - if (retry_instruction(ctxt, cr2, emulation_type)) + if (retry_instruction(ctxt, cr2_or_gpa, emulation_type)) return 1; /* this is needed for vmware backdoor interface to work since it @@ -6736,7 +6789,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, restart: /* Save the faulting GPA (cr2) in the address field */ - ctxt->exception.address = cr2; + ctxt->exception.address = cr2_or_gpa; r = x86_emulate_insn(ctxt); @@ -6744,7 +6797,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return 1; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, + if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; @@ -8198,8 +8251,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) trace_kvm_entry(vcpu->vcpu_id); guest_enter_irqoff(); - /* The preempt notifier should have taken care of the FPU already. */ - WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD)); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); @@ -8471,12 +8525,26 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } +static void kvm_save_current_fpu(struct fpu *fpu) +{ + /* + * If the target FPU state is not resident in the CPU registers, just + * memcpy() from current, else save CPU state directly to the target. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&fpu->state, ¤t->thread.fpu.state, + fpu_kernel_xstate_size); + else + copy_fpregs_to_fpstate(fpu); +} + /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.user_fpu); + kvm_save_current_fpu(vcpu->arch.user_fpu); + /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); @@ -8492,7 +8560,8 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { fpregs_lock(); - copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); + kvm_save_current_fpu(vcpu->arch.guest_fpu); + copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state); fpregs_mark_activate(); @@ -8714,6 +8783,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { vcpu_load(vcpu); + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && @@ -8722,6 +8793,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return 0; } @@ -9085,6 +9158,9 @@ static void fx_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache; + + kvm_release_pfn(cache->pfn, cache->dirty, cache); kvmclock_reset(vcpu); @@ -9149,13 +9225,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - vcpu->arch.apf.msr_val = 0; - - vcpu_load(vcpu); - kvm_mmu_unload(vcpu); - vcpu_put(vcpu); - - kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_free(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -9347,6 +9417,8 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); + if (kvm_has_tsc_control) { /* * Make sure the user can only configure tsc_khz values that @@ -9518,7 +9590,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); + kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; @@ -9758,11 +9830,18 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { + struct kvm_vcpu *vcpu; + int i; + /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ kvm_mmu_invalidate_mmio_sptes(kvm, gen); + + /* Force re-initialization of steal_time cache */ + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vcpu_kick(vcpu); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -10014,7 +10093,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->cr2_or_gpa, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -10127,7 +10206,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_not_present(work->arch.token, work->gva); + trace_kvm_async_pf_not_present(work->arch.token, work->cr2_or_gpa); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && @@ -10162,7 +10241,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); - trace_kvm_async_pf_ready(work->arch.token, work->gva); + trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && !apf_get_user(vcpu, &val)) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 29391af8871d..cab5e71f0f0f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -289,7 +289,7 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int page_num); bool kvm_vector_hashing_enabled(void); -int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, +int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 8908c58bd6cd..53adc1762ec0 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -929,7 +929,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 304d31d8cbbc..c494c8c05824 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -189,7 +189,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -216,6 +216,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -318,11 +328,23 @@ static void dump_pagetable(unsigned long address) #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index b3a2936377b5..e12cc7515ad7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -106,6 +106,19 @@ static unsigned int __ioremap_check_encrypted(struct resource *res) return 0; } +/* + * The EFI runtime services data area is not covered by walk_mem_res(), but must + * be mapped encrypted when SEV is active. + */ +static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc) +{ + if (!sev_active()) + return; + + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + desc->flags |= IORES_MAP_ENCRYPTED; +} + static int __ioremap_collect_map_flags(struct resource *res, void *arg) { struct ioremap_desc *desc = arg; @@ -124,6 +137,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg) * To avoid multiple resource walks, this function walks resources marked as * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + * + * After that, deal with misc other ranges in __ioremap_check_other() which do + * not fall into the above category. */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, struct ioremap_desc *desc) @@ -135,6 +151,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, memset(desc, 0, sizeof(struct ioremap_desc)); walk_mem_res(start, end, desc, __ioremap_collect_map_flags); + + __ioremap_check_other(addr, desc); } /* diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 1b99ad05b117..f42780ba0893 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2215,7 +2215,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; @@ -2224,12 +2224,6 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 38d44f36d5ed..ad4dd3a97753 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -541,7 +541,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -566,14 +565,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -999,16 +1000,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -1022,8 +1021,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -1031,12 +1029,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -1056,9 +1053,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } efi_free_boot_services(); @@ -1087,6 +1084,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 08ce8177c3af..fe0e647411da 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -316,7 +316,7 @@ void efi_sync_low_kernel_mappings(void) static inline phys_addr_t virt_to_phys_or_null_size(void *va, unsigned long size) { - bool bad_size; + phys_addr_t pa; if (!va) return 0; @@ -324,16 +324,13 @@ virt_to_phys_or_null_size(void *va, unsigned long size) if (virt_addr_valid(va)) return virt_to_phys(va); - /* - * A fully aligned variable on the stack is guaranteed not to - * cross a page bounary. Try to catch strings on the stack by - * checking that 'size' is a power of two. - */ - bad_size = size > PAGE_SIZE || !is_power_of_2(size); + pa = slow_virt_to_phys(va); - WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size); + /* check if the object crosses a page boundary */ + if (WARN_ON((pa ^ (pa + size - 1)) & PAGE_MASK)) + return 0; - return slow_virt_to_phys(va); + return pa; } #define virt_to_phys_or_null(addr) \ @@ -392,11 +389,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); @@ -790,6 +788,8 @@ static efi_status_t efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, u32 *attr, unsigned long *data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name, phys_vendor, phys_attr; u32 phys_data_size, phys_data; @@ -797,14 +797,19 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_data_size = virt_to_phys_or_null(data_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); phys_attr = virt_to_phys_or_null(attr); phys_data = virt_to_phys_or_null_size(data, *data_size); - status = efi_thunk(get_variable, phys_name, phys_vendor, - phys_attr, phys_data_size, phys_data); + if (!phys_name || (data && !phys_data)) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -815,19 +820,25 @@ static efi_status_t efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || !phys_data) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -839,6 +850,8 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, u32 attr, unsigned long data_size, void *data) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); u32 phys_name, phys_vendor, phys_data; efi_status_t status; unsigned long flags; @@ -846,13 +859,17 @@ efi_thunk_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor, if (!spin_trylock_irqsave(&efi_runtime_lock, flags)) return EFI_NOT_READY; + *vnd = *vendor; + phys_name = virt_to_phys_or_null_size(name, efi_name_size(name)); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_data = virt_to_phys_or_null_size(data, data_size); - /* If data_size is > sizeof(u32) we've got problems */ - status = efi_thunk(set_variable, phys_name, phys_vendor, - attr, data_size, phys_data); + if (!phys_name || !phys_data) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); spin_unlock_irqrestore(&efi_runtime_lock, flags); @@ -864,21 +881,29 @@ efi_thunk_get_next_variable(unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) { + u8 buf[24] __aligned(8); + efi_guid_t *vnd = PTR_ALIGN((efi_guid_t *)buf, sizeof(*vnd)); efi_status_t status; u32 phys_name_size, phys_name, phys_vendor; unsigned long flags; spin_lock_irqsave(&efi_runtime_lock, flags); + *vnd = *vendor; + phys_name_size = virt_to_phys_or_null(name_size); - phys_vendor = virt_to_phys_or_null(vendor); + phys_vendor = virt_to_phys_or_null(vnd); phys_name = virt_to_phys_or_null_size(name, *name_size); - status = efi_thunk(get_next_variable, phys_name_size, - phys_name, phys_vendor); + if (!phys_name) + status = EFI_INVALID_PARAMETER; + else + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); spin_unlock_irqrestore(&efi_runtime_lock, flags); + *vendor = *vnd; return status; } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index ae4a41ca19f6..507f4fb88fa7 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -72,6 +72,9 @@ #include #include #include +#ifdef CONFIG_X86_IOPL_IOPERM +#include +#endif #ifdef CONFIG_ACPI #include @@ -837,6 +840,25 @@ static void xen_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } +#ifdef CONFIG_X86_IOPL_IOPERM +static void xen_update_io_bitmap(void) +{ + struct physdev_set_iobitmap iobitmap; + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + + native_tss_update_io_bitmap(); + + iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) + + tss->x86_tss.io_bitmap_base; + if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID) + iobitmap.nr_ports = 0; + else + iobitmap.nr_ports = IO_BITMAP_BITS; + + HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap); +} +#endif + static void xen_io_delay(void) { } @@ -896,14 +918,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; @@ -1046,6 +1069,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_idt_entry = xen_write_idt_entry, .load_sp0 = xen_load_sp0, +#ifdef CONFIG_X86_IOPL_IOPERM + .update_io_bitmap = xen_update_io_bitmap, +#endif .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ @@ -1205,6 +1231,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason; x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_select = x86_init_noop; x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index e1419edde2ec..68882b9b8f11 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -332,7 +332,7 @@ static void bfqg_put(struct bfq_group *bfqg) kfree(bfqg); } -static void bfqg_and_blkg_get(struct bfq_group *bfqg) +void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); @@ -610,12 +610,13 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, */ entity = &bfqg->entity; for_each_entity(entity) { - bfqg = container_of(entity, struct bfq_group, entity); - if (bfqg != bfqd->root_group) { - parent = bfqg_parent(bfqg); + struct bfq_group *curr_bfqg = container_of(entity, + struct bfq_group, entity); + if (curr_bfqg != bfqd->root_group) { + parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; - bfq_group_set_parent(bfqg, parent); + bfq_group_set_parent(curr_bfqg, parent); } } @@ -641,6 +642,12 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_entity *entity = &bfqq->entity; + /* + * Get extra reference to prevent bfqq from being freed in + * next possible expire or deactivate. + */ + bfqq->ref++; + /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we @@ -653,7 +660,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); - else if (entity->on_st) + else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(bfqq_group(bfqq)); @@ -670,6 +677,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->in_service_queue && !bfqd->rq_in_driver) bfq_schedule_dispatch(bfqd); + /* release extra ref taken above, bfqq may happen to be freed now */ + bfq_put_queue(bfqq); } /** @@ -705,10 +714,7 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, if (entity->sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, 0); - bfq_log_bfqq(bfqd, async_bfqq, - "bic_change_group: %p %d", - async_bfqq, async_bfqq->ref); - bfq_put_queue(async_bfqq); + bfq_release_process_ref(bfqd, async_bfqq); } } @@ -809,39 +815,53 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. - * @entity: the entity to move. + * @entity: the entity to move, if entity is a leaf; or the parent entity + * of an active leaf entity to move, if entity is not a leaf. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, - struct bfq_entity *entity) + struct bfq_entity *entity, + int ioprio_class) { - struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); + struct bfq_queue *bfqq; + struct bfq_entity *child_entity = entity; + + while (child_entity->my_sched_data) { /* leaf not reached yet */ + struct bfq_sched_data *child_sd = child_entity->my_sched_data; + struct bfq_service_tree *child_st = child_sd->service_tree + + ioprio_class; + struct rb_root *child_active = &child_st->active; + child_entity = bfq_entity_of(rb_first(child_active)); + + if (!child_entity) + child_entity = child_sd->in_service_entity; + } + + bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** - * bfq_reparent_active_entities - move to the root group all active - * entities. + * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. - * @st: the service tree with the entities. + * @st: the service tree to start the search from. */ -static void bfq_reparent_active_entities(struct bfq_data *bfqd, - struct bfq_group *bfqg, - struct bfq_service_tree *st) +static void bfq_reparent_active_queues(struct bfq_data *bfqd, + struct bfq_group *bfqg, + struct bfq_service_tree *st, + int ioprio_class) { struct rb_root *active = &st->active; - struct bfq_entity *entity = NULL; - - if (!RB_EMPTY_ROOT(&st->active)) - entity = bfq_entity_of(rb_first(active)); + struct bfq_entity *entity; - for (; entity ; entity = bfq_entity_of(rb_first(active))) - bfq_reparent_leaf_entity(bfqd, entity); + while ((entity = bfq_entity_of(rb_first(active)))) + bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, - bfqg->sched_data.in_service_entity); + bfqg->sched_data.in_service_entity, + ioprio_class); } /** @@ -873,13 +893,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; - /* - * The idle tree may still contain bfq_queues belonging - * to exited task because they never migrated to a different - * cgroup from the one being destroyed now. - */ - bfq_flush_idle_tree(st); - /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding @@ -892,7 +905,20 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) * There is no need to put the sync queues, as the * scheduler has taken no reference. */ - bfq_reparent_active_entities(bfqd, bfqg, st); + bfq_reparent_active_queues(bfqd, bfqg, st, i); + + /* + * The idle tree may still contain bfq_queues + * belonging to exited task because they never + * migrated to a different cgroup from the one being + * destroyed now. In addition, even + * bfq_reparent_active_queues() may happen to add some + * entities to the idle tree. It happens if, in some + * of the calls to bfq_bfqq_move() performed by + * bfq_reparent_active_queues(), the queue to move is + * empty and gets expired. + */ + bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); @@ -1398,6 +1424,10 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } +void bfqg_and_blkg_get(struct bfq_group *bfqg) {} + +void bfqg_and_blkg_put(struct bfq_group *bfqg) {} + struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index ad4af4aaf2ce..78ba57efd16b 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -427,7 +427,6 @@ void bfq_schedule_dispatch(struct bfq_data *bfqd) } #define bfq_class_idle(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_IDLE) -#define bfq_class_rt(bfqq) ((bfqq)->ioprio_class == IOPRIO_CLASS_RT) #define bfq_sample_valid(samples) ((samples) > 80) @@ -614,6 +613,10 @@ bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->pos_root = NULL; } + /* oom_bfqq does not participate in queue merging */ + if (bfqq == &bfqd->oom_bfqq) + return; + /* * bfqq cannot be merged any longer (see comments in * bfq_setup_cooperator): no point in adding bfqq into the @@ -1056,7 +1059,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, static int bfqq_process_refs(struct bfq_queue *bfqq) { - return bfqq->ref - bfqq->allocated - bfqq->entity.on_st - + return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - (bfqq->weight_counter != NULL); } @@ -2713,8 +2716,6 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) } } - -static void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* @@ -3444,6 +3445,10 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + return (bfqq->wr_coeff > 1 && (bfqd->wr_busy_queues < bfq_tot_busy_queues(bfqd) || @@ -4077,6 +4082,10 @@ static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, bfqq_sequential_and_IO_bound, idling_boosts_thr; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) && bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq); @@ -4170,6 +4179,10 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq) struct bfq_data *bfqd = bfqq->bfqd; bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar; + /* No point in idling for bfqq if it won't get requests any longer */ + if (unlikely(!bfqq_process_refs(bfqq))) + return false; + if (unlikely(bfqd->strict_guarantees)) return true; @@ -4810,9 +4823,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) { struct bfq_queue *item; struct hlist_node *n; -#ifdef CONFIG_BFQ_GROUP_IOSCHED struct bfq_group *bfqg = bfqq_group(bfqq); -#endif if (bfqq->bfqd) bfq_log_bfqq(bfqq->bfqd, bfqq, "put_queue: %p %d", @@ -4885,9 +4896,7 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqq->bfqd->last_completed_rq_bfqq = NULL; kmem_cache_free(bfq_pool, bfqq); -#ifdef CONFIG_BFQ_GROUP_IOSCHED bfqg_and_blkg_put(bfqg); -#endif } static void bfq_put_cooperator(struct bfq_queue *bfqq) @@ -5968,6 +5977,8 @@ static void bfq_finish_requeue_request(struct request *rq) } /* + * Removes the association between the current task and bfqq, assuming + * that bic points to the bfq iocontext of the task. * Returns NULL if a new bfqq should be allocated, or the old bfqq if this * was the last process referring to that bfqq. */ @@ -6202,20 +6213,28 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) return bfqq; } -static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq) +static void +bfq_idle_slice_timer_body(struct bfq_data *bfqd, struct bfq_queue *bfqq) { - struct bfq_data *bfqd = bfqq->bfqd; enum bfqq_expiration reason; unsigned long flags; spin_lock_irqsave(&bfqd->lock, flags); - bfq_clear_bfqq_wait_request(bfqq); + /* + * Considering that bfqq may be in race, we should firstly check + * whether bfqq is in service before doing something on it. If + * the bfqq in race is not in service, it has already been expired + * through __bfq_bfqq_expire func and its wait_request flags has + * been cleared in __bfq_bfqd_reset_in_service func. + */ if (bfqq != bfqd->in_service_queue) { spin_unlock_irqrestore(&bfqd->lock, flags); return; } + bfq_clear_bfqq_wait_request(bfqq); + if (bfq_bfqq_budget_timeout(bfqq)) /* * Also here the queue can be safely expired @@ -6260,7 +6279,7 @@ static enum hrtimer_restart bfq_idle_slice_timer(struct hrtimer *timer) * early. */ if (bfqq) - bfq_idle_slice_timer_body(bfqq); + bfq_idle_slice_timer_body(bfqd, bfqq); return HRTIMER_NORESTART; } @@ -6375,10 +6394,10 @@ static void bfq_exit_queue(struct elevator_queue *e) hrtimer_cancel(&bfqd->idle_slice_timer); -#ifdef CONFIG_BFQ_GROUP_IOSCHED /* release oom-queue reference to root group */ bfqg_and_blkg_put(bfqd->root_group); +#ifdef CONFIG_BFQ_GROUP_IOSCHED blkcg_deactivate_policy(bfqd->queue, &blkcg_policy_bfq); #else spin_lock_irq(&bfqd->lock); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 8526f20c53bc..cd224aaf9f52 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -150,7 +150,7 @@ struct bfq_entity { * Flag, true if the entity is on a tree (either the active or * the idle one of its service_tree) or is in service. */ - bool on_st; + bool on_st_or_in_serv; /* B-WF2Q+ start and finish timestamps [sectors/weight] */ u64 start, finish; @@ -921,6 +921,7 @@ struct bfq_group { #else struct bfq_group { + struct bfq_entity entity; struct bfq_sched_data sched_data; struct bfq_queue *async_bfqq[2][IOPRIO_BE_NR]; @@ -954,6 +955,7 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); +void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); @@ -984,6 +986,7 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); +void bfqg_and_blkg_get(struct bfq_group *bfqg); void bfqg_and_blkg_put(struct bfq_group *bfqg); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 05f0bf4a1144..eb0e2a6daabe 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -277,10 +277,7 @@ struct bfq_queue *bfq_entity_to_bfqq(struct bfq_entity *entity) */ static u64 bfq_delta(unsigned long service, unsigned long weight) { - u64 d = (u64)service << WFQ_SERVICE_SHIFT; - - do_div(d, weight); - return d; + return div64_ul((u64)service << WFQ_SERVICE_SHIFT, weight); } /** @@ -536,7 +533,9 @@ static void bfq_get_entity(struct bfq_entity *entity) bfqq->ref++; bfq_log_bfqq(bfqq->bfqd, bfqq, "get_entity: %p %d", bfqq, bfqq->ref); - } + } else + bfqg_and_blkg_get(container_of(entity, struct bfq_group, + entity)); } /** @@ -648,10 +647,16 @@ static void bfq_forget_entity(struct bfq_service_tree *st, { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); - entity->on_st = false; + entity->on_st_or_in_serv = false; st->wsum -= entity->weight; - if (bfqq && !is_in_service) + if (is_in_service) + return; + + if (bfqq) bfq_put_queue(bfqq); + else + bfqg_and_blkg_put(container_of(entity, struct bfq_group, + entity)); } /** @@ -1002,7 +1007,7 @@ static void __bfq_activate_entity(struct bfq_entity *entity, */ bfq_get_entity(entity); - entity->on_st = true; + entity->on_st_or_in_serv = true; } #ifdef CONFIG_BFQ_GROUP_IOSCHED @@ -1168,7 +1173,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree) struct bfq_service_tree *st; bool is_in_service; - if (!entity->on_st) /* entity never activated, or already inactive */ + if (!entity->on_st_or_in_serv) /* + * entity never activated, or + * already inactive + */ return false; /* @@ -1623,7 +1631,7 @@ bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) * service tree either, then release the service reference to * the queue it represents (taken with bfq_get_entity). */ - if (!in_serv_entity->on_st) { + if (!in_serv_entity->on_st_or_in_serv) { /* * If no process is referencing in_serv_bfqq any * longer, then the service reference may be the only diff --git a/block/blk-flush.c b/block/blk-flush.c index 3f977c517960..5cc775bdb06a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -412,7 +412,7 @@ void blk_insert_flush(struct request *rq) */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); return; } diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 27ca68621137..9a599cc28c29 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1318,7 +1318,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg) return false; /* is something in flight? */ - if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime)) + if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime)) return false; return true; diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index ca22afd47b3d..74cedea56034 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, bool has_sched, struct request *rq) { - /* dispatch flush rq directly */ - if (rq->rq_flags & RQF_FLUSH_SEQ) { - spin_lock(&hctx->lock); - list_add(&rq->queuelist, &hctx->dispatch); - spin_unlock(&hctx->lock); + /* + * dispatch flush and passthrough rq directly + * + * passthrough request has to be added to hctx->dispatch directly. + * For some reason, device may be in one situation which can't + * handle FS request, so STS_RESOURCE is always returned and the + * FS request will be added to hctx->dispatch. However passthrough + * request may be required at that time for fixing the problem. If + * passthrough request is added to scheduler queue, there isn't any + * chance to dispatch it given we prioritize requests in hctx->dispatch. + */ + if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq)) return true; - } if (has_sched) rq->rq_flags |= RQF_SORTED; @@ -391,8 +397,32 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); - if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) + if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + /* + * Firstly normal IO request is inserted to scheduler queue or + * sw queue, meantime we add flush request to dispatch queue( + * hctx->dispatch) directly and there is at most one in-flight + * flush request for each hw queue, so it doesn't matter to add + * flush request to tail or front of the dispatch queue. + * + * Secondly in case of NCQ, flush request belongs to non-NCQ + * command, and queueing it will fail when there is any + * in-flight normal IO request(NCQ command). When adding flush + * rq to the front of hctx->dispatch, it is easier to introduce + * extra time to flush rq's latency because of S_SCHED_RESTART + * compared with adding to the tail of dispatch queue, then + * chance of flush merge is increased, and less flush requests + * will be issued to controller. It is observed that ~10% time + * is saved in blktests block/004 on disk attached to AHCI/NCQ + * drive when adding flush rq to the front of hctx->dispatch. + * + * Simply queue flush rq to the front of hctx->dispatch so that + * intensive flush workloads can benefit in case of NCQ HW. + */ + at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; + blk_mq_request_bypass_insert(rq, at_head, false); goto run; + } if (e && e->type->ops.insert_requests) { LIST_HEAD(list); diff --git a/block/blk-mq.c b/block/blk-mq.c index 323c9cb28066..329df7986bf6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -727,7 +727,7 @@ static void blk_mq_requeue_work(struct work_struct *work) * merge. */ if (rq->rq_flags & RQF_DONTPREP) - blk_mq_request_bypass_insert(rq, false); + blk_mq_request_bypass_insert(rq, false, false); else blk_mq_sched_insert_request(rq, true, false, false); } @@ -1278,7 +1278,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, q->mq_ops->commit_rqs(hctx); spin_lock(&hctx->lock); - list_splice_init(list, &hctx->dispatch); + list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); /* @@ -1629,12 +1629,16 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, * Should only be used carefully, when the caller knows we want to * bypass a potential IO scheduler on the target device. */ -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue) +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; spin_lock(&hctx->lock); - list_add_tail(&rq->queuelist, &hctx->dispatch); + if (at_head) + list_add(&rq->queuelist, &hctx->dispatch); + else + list_add_tail(&rq->queuelist, &hctx->dispatch); spin_unlock(&hctx->lock); if (run_queue) @@ -1824,7 +1828,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (bypass_insert) return BLK_STS_RESOURCE; - blk_mq_request_bypass_insert(rq, run_queue); + blk_mq_request_bypass_insert(rq, false, run_queue); return BLK_STS_OK; } @@ -1840,7 +1844,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true); if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) - blk_mq_request_bypass_insert(rq, true); + blk_mq_request_bypass_insert(rq, false, true); else if (ret != BLK_STS_OK) blk_mq_end_request(rq, ret); @@ -1874,7 +1878,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, if (ret != BLK_STS_OK) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { - blk_mq_request_bypass_insert(rq, + blk_mq_request_bypass_insert(rq, false, list_empty(list)); break; } diff --git a/block/blk-mq.h b/block/blk-mq.h index eaaca8fc1c28..c0fa34378eb2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -66,7 +66,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, */ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head); -void blk_mq_request_bypass_insert(struct request *rq, bool run_queue); +void blk_mq_request_bypass_insert(struct request *rq, bool at_head, + bool run_queue); void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct list_head *list); diff --git a/block/genhd.c b/block/genhd.c index ff6268970ddc..9c2e13ce0d19 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -301,6 +301,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); +/** + * disk_has_partitions + * @disk: gendisk of interest + * + * Walk through the partition table and check if valid partition exists. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * True if the gendisk has at least one valid non-zero size partition. + * Otherwise false. + */ +bool disk_has_partitions(struct gendisk *disk) +{ + struct disk_part_tbl *ptbl; + int i; + bool ret = false; + + rcu_read_lock(); + ptbl = rcu_dereference(disk->part_tbl); + + /* Iterate partitions skipping the whole device at index 0 */ + for (i = 1; i < ptbl->len; i++) { + if (rcu_dereference(ptbl->part[i])) { + ret = true; + break; + } + } + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(disk_has_partitions); + /* * Can be deleted altogether. Later. * diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 650bade5ea5a..b61dbf4d8443 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -20,6 +20,7 @@ #include #include #include +#include struct blk_cmd_filter { unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; @@ -550,34 +551,6 @@ static inline int blk_send_start_stop(struct request_queue *q, return __blk_send_generic(q, bd_disk, GPCMD_START_STOP_UNIT, data); } -#ifdef CONFIG_COMPAT -struct compat_sg_io_hdr { - compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ - compat_int_t dxfer_direction; /* [i] data transfer direction */ - unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ - unsigned char mx_sb_len; /* [i] max length to write to sbp */ - unsigned short iovec_count; /* [i] 0 implies no scatter gather */ - compat_uint_t dxfer_len; /* [i] byte count of data transfer */ - compat_uint_t dxferp; /* [i], [*io] points to data transfer memory - or scatter gather list */ - compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ - compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ - compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ - compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ - compat_int_t pack_id; /* [i->o] unused internally (normally) */ - compat_uptr_t usr_ptr; /* [i->o] unused internally */ - unsigned char status; /* [o] scsi status */ - unsigned char masked_status; /* [o] shifted, masked scsi status */ - unsigned char msg_status; /* [o] messaging level data (optional) */ - unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ - unsigned short host_status; /* [o] errors from host adapter */ - unsigned short driver_status; /* [o] errors from software driver */ - compat_int_t resid; /* [o] dxfer_len - actual_transferred */ - compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ - compat_uint_t info; /* [o] auxiliary information */ -}; -#endif - int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp) { #ifdef CONFIG_COMPAT diff --git a/crypto/Kconfig b/crypto/Kconfig index 5575d48473bd..cdb51d4272d0 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -511,10 +511,10 @@ config CRYPTO_ESSIV encryption. This driver implements a crypto API template that can be - instantiated either as a skcipher or as a aead (depending on the + instantiated either as an skcipher or as an AEAD (depending on the type of the first template argument), and which defers encryption and decryption requests to the encapsulated cipher after applying - ESSIV to the input IV. Note that in the aead case, it is assumed + ESSIV to the input IV. Note that in the AEAD case, it is assumed that the keys are presented in the same format used by the authenc template, and that the IV appears at the end of the authenticated associated data (AAD) region (which is how dm-crypt uses it.) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 0dceaabc6321..3d8e53010cda 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -134,11 +134,13 @@ void af_alg_release_parent(struct sock *sk) sk = ask->parent; ask = alg_sk(sk); - lock_sock(sk); + local_bh_disable(); + bh_lock_sock(sk); ask->nokey_refcnt -= nokey; if (!last) last = !--ask->refcnt; - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable(); if (last) sock_put(sk); diff --git a/crypto/algapi.c b/crypto/algapi.c index b052f38edba6..5566650b2091 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -257,6 +257,7 @@ void crypto_alg_tested(const char *name, int err) struct crypto_alg *alg; struct crypto_alg *q; LIST_HEAD(list); + bool best; down_write(&crypto_alg_sem); list_for_each_entry(q, &crypto_alg_list, cra_list) { @@ -280,6 +281,21 @@ void crypto_alg_tested(const char *name, int err) alg->cra_flags |= CRYPTO_ALG_TESTED; + /* Only satisfy larval waiters if we are the best. */ + best = true; + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (crypto_is_moribund(q) || !crypto_is_larval(q)) + continue; + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (q->cra_priority > alg->cra_priority) { + best = false; + break; + } + } + list_for_each_entry(q, &crypto_alg_list, cra_list) { if (q == alg) continue; @@ -303,10 +319,12 @@ void crypto_alg_tested(const char *name, int err) continue; if ((q->cra_flags ^ alg->cra_flags) & larval->mask) continue; - if (!crypto_mod_get(alg)) - continue; - larval->adult = alg; + if (best && crypto_mod_get(alg)) + larval->adult = alg; + else + larval->adult = ERR_PTR(-EAGAIN); + continue; } @@ -669,11 +687,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn); void crypto_drop_spawn(struct crypto_spawn *spawn) { - if (!spawn->alg) - return; - down_write(&crypto_alg_sem); - list_del(&spawn->list); + if (spawn->alg) + list_del(&spawn->list); up_write(&crypto_alg_sem); } EXPORT_SYMBOL_GPL(crypto_drop_spawn); @@ -681,22 +697,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn); static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) { struct crypto_alg *alg; - struct crypto_alg *alg2; down_read(&crypto_alg_sem); alg = spawn->alg; - alg2 = alg; - if (alg2) - alg2 = crypto_mod_get(alg2); - up_read(&crypto_alg_sem); - - if (!alg2) { - if (alg) - crypto_shoot_alg(alg); - return ERR_PTR(-EAGAIN); + if (alg && !crypto_mod_get(alg)) { + alg->cra_flags |= CRYPTO_ALG_DYING; + alg = NULL; } + up_read(&crypto_alg_sem); - return alg; + return alg ?: ERR_PTR(-EAGAIN); } struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, diff --git a/crypto/api.c b/crypto/api.c index 55bca28df92d..c00af5ad1b16 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -97,7 +97,7 @@ static void crypto_larval_destroy(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; BUG_ON(!crypto_is_larval(alg)); - if (larval->adult) + if (!IS_ERR_OR_NULL(larval->adult)) crypto_mod_put(larval->adult); kfree(larval); } @@ -178,6 +178,8 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) alg = ERR_PTR(-ETIMEDOUT); else if (!alg) alg = ERR_PTR(-ENOENT); + else if (IS_ERR(alg)) + ; else if (crypto_is_test_larval(larval) && !(alg->cra_flags & CRYPTO_ALG_TESTED)) alg = ERR_PTR(-EAGAIN); @@ -344,13 +346,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) return len; } -void crypto_shoot_alg(struct crypto_alg *alg) +static void crypto_shoot_alg(struct crypto_alg *alg) { down_write(&crypto_alg_sem); alg->cra_flags |= CRYPTO_ALG_DYING; up_write(&crypto_alg_sem); } -EXPORT_SYMBOL_GPL(crypto_shoot_alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/hash_info.c b/crypto/hash_info.c index c754cb75dd1a..a49ff96bde77 100644 --- a/crypto/hash_info.c +++ b/crypto/hash_info.c @@ -26,7 +26,7 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = { [HASH_ALGO_TGR_128] = "tgr128", [HASH_ALGO_TGR_160] = "tgr160", [HASH_ALGO_TGR_192] = "tgr192", - [HASH_ALGO_SM3_256] = "sm3-256", + [HASH_ALGO_SM3_256] = "sm3", [HASH_ALGO_STREEBOG_256] = "streebog256", [HASH_ALGO_STREEBOG_512] = "streebog512", }; diff --git a/crypto/internal.h b/crypto/internal.h index 93df7bec844a..e506a57e2243 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -68,7 +68,6 @@ void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); -void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm(struct crypto_alg *alg, diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 543792e0ebf0..a4f3b3f342c8 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -24,6 +24,8 @@ static struct kset *pcrypt_kset; struct pcrypt_instance_ctx { struct crypto_aead_spawn spawn; + struct padata_shell *psenc; + struct padata_shell *psdec; atomic_t tfm_count; }; @@ -32,6 +34,12 @@ struct pcrypt_aead_ctx { unsigned int cb_cpu; }; +static inline struct pcrypt_instance_ctx *pcrypt_tfm_ictx( + struct crypto_aead *tfm) +{ + return aead_instance_ctx(aead_alg_instance(tfm)); +} + static int pcrypt_aead_setkey(struct crypto_aead *parent, const u8 *key, unsigned int keylen) { @@ -63,7 +71,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err) struct padata_priv *padata = pcrypt_request_padata(preq); padata->info = err; - req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; padata_do_serial(padata); } @@ -90,6 +97,9 @@ static int pcrypt_aead_encrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); u32 flags = aead_request_flags(req); + struct pcrypt_instance_ctx *ictx; + + ictx = pcrypt_tfm_ictx(aead); memset(padata, 0, sizeof(struct padata_priv)); @@ -103,7 +113,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req) req->cryptlen, req->iv); aead_request_set_ad(creq, req->assoclen); - err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu); + err = padata_do_parallel(ictx->psenc, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; @@ -132,6 +142,9 @@ static int pcrypt_aead_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead); u32 flags = aead_request_flags(req); + struct pcrypt_instance_ctx *ictx; + + ictx = pcrypt_tfm_ictx(aead); memset(padata, 0, sizeof(struct padata_priv)); @@ -145,7 +158,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req) req->cryptlen, req->iv); aead_request_set_ad(creq, req->assoclen); - err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu); + err = padata_do_parallel(ictx->psdec, padata, &ctx->cb_cpu); if (!err) return -EINPROGRESS; @@ -192,6 +205,8 @@ static void pcrypt_free(struct aead_instance *inst) struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_aead(&ctx->spawn); + padata_free_shell(ctx->psdec); + padata_free_shell(ctx->psenc); kfree(inst); } @@ -233,12 +248,22 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, if (!inst) return -ENOMEM; + err = -ENOMEM; + ctx = aead_instance_ctx(inst); + ctx->psenc = padata_alloc_shell(pencrypt); + if (!ctx->psenc) + goto out_free_inst; + + ctx->psdec = padata_alloc_shell(pdecrypt); + if (!ctx->psdec) + goto out_free_psenc; + crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst)); err = crypto_grab_aead(&ctx->spawn, name, 0, 0); if (err) - goto out_free_inst; + goto out_free_psdec; alg = crypto_spawn_aead_alg(&ctx->spawn); err = pcrypt_init_instance(aead_crypto_instance(inst), &alg->base); @@ -271,6 +296,10 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb, out_drop_aead: crypto_drop_aead(&ctx->spawn); +out_free_psdec: + padata_free_shell(ctx->psdec); +out_free_psenc: + padata_free_shell(ctx->psenc); out_free_inst: kfree(inst); goto out; @@ -362,11 +391,12 @@ static int __init pcrypt_init(void) static void __exit pcrypt_exit(void) { + crypto_unregister_template(&pcrypt_tmpl); + pcrypt_fini_padata(pencrypt); pcrypt_fini_padata(pdecrypt); kset_unregister(pcrypt_kset); - crypto_unregister_template(&pcrypt_tmpl); } subsys_initcall(pcrypt_init); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 82513b6b0abd..2c96963b2e51 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2102,6 +2102,7 @@ static void generate_random_aead_testvec(struct aead_request *req, * If the key or authentication tag size couldn't be set, no need to * continue to encrypt. */ + vec->crypt_error = 0; if (vec->setkey_error || vec->setauthsize_error) goto done; @@ -2245,10 +2246,12 @@ static int test_aead_vs_generic_impl(const char *driver, req, tsgls); if (err) goto out; - err = test_aead_vec_cfg(driver, DECRYPT, &vec, vec_name, cfg, - req, tsgls); - if (err) - goto out; + if (vec.crypt_error == 0) { + err = test_aead_vec_cfg(driver, DECRYPT, &vec, vec_name, + cfg, req, tsgls); + if (err) + goto out; + } cond_resched(); } err = 0; @@ -2678,6 +2681,15 @@ static void generate_random_cipher_testvec(struct skcipher_request *req, skcipher_request_set_callback(req, 0, crypto_req_done, &wait); skcipher_request_set_crypt(req, &src, &dst, vec->len, iv); vec->crypt_error = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + if (vec->crypt_error != 0) { + /* + * The only acceptable error here is for an invalid length, so + * skcipher decryption should fail with the same error too. + * We'll test for this. But to keep the API usage well-defined, + * explicitly initialize the ciphertext buffer too. + */ + memset((u8 *)vec->ctext, 0, vec->len); + } done: snprintf(name, max_namelen, "\"random: len=%u klen=%u\"", vec->len, vec->klen); diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index b5516b04ffc0..6e9ec6e3fe47 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -55,12 +55,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -88,6 +90,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; @@ -126,12 +136,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h index bcf8f7501db7..a74c1a0e892d 100644 --- a/drivers/acpi/acpica/achware.h +++ b/drivers/acpi/acpica/achware.h @@ -101,6 +101,8 @@ acpi_status acpi_hw_enable_all_runtime_gpes(void); acpi_status acpi_hw_enable_all_wakeup_gpes(void); +u8 acpi_hw_check_all_gpes(void); + acpi_status acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, struct acpi_gpe_block_info *gpe_block, diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index faa38a22263a..ae713d746c8b 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -243,7 +243,7 @@ acpi_ds_create_buffer_field(union acpi_parse_object *op, * FUNCTION: acpi_ds_get_field_names * * PARAMETERS: info - create_field info structure - * ` walk_state - Current method state + * walk_state - Current method state * arg - First parser arg for the field name list * * RETURN: Status diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index c88fd31208a5..4bcf15bf03de 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -410,6 +410,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op, walk_state)); + /* + * Disassembler: handle create field operators here. + * + * create_buffer_field is a deferred op that is typically processed in load + * pass 2. However, disassembly of control method contents walk the parse + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed + * in a later walk. This is a problem when there is a control method that + * has the same name as the AML_CREATE object. In this case, any use of the + * name segment will be detected as a method call rather than a reference + * to a buffer field. + * + * This earlier creation during disassembly solves this issue by inserting + * the named object in the ACPI namespace so that references to this name + * would be a name string rather than a method call. + */ + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) && + (walk_state->op_info->flags & AML_CREATE)) { + status = acpi_ds_create_buffer_field(op, walk_state); + return_ACPI_STATUS(status); + } + /* We are only interested in opcodes that have an associated name */ if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) { diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c index 9e2f5a05c066..bad2257356fe 100644 --- a/drivers/acpi/acpica/evevent.c +++ b/drivers/acpi/acpica/evevent.c @@ -265,4 +265,49 @@ static u32 acpi_ev_fixed_event_dispatch(u32 event) handler) (acpi_gbl_fixed_event_handlers[event].context)); } +/******************************************************************************* + * + * FUNCTION: acpi_any_fixed_event_status_set + * + * PARAMETERS: None + * + * RETURN: TRUE or FALSE + * + * DESCRIPTION: Checks the PM status register for active fixed events + * + ******************************************************************************/ + +u32 acpi_any_fixed_event_status_set(void) +{ + acpi_status status; + u32 in_status; + u32 in_enable; + u32 i; + + status = acpi_hw_register_read(ACPI_REGISTER_PM1_ENABLE, &in_enable); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + status = acpi_hw_register_read(ACPI_REGISTER_PM1_STATUS, &in_status); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + /* + * Check for all possible Fixed Events and dispatch those that are active + */ + for (i = 0; i < ACPI_NUM_FIXED_EVENTS; i++) { + + /* Both the status and enable bits must be on for this event */ + + if ((in_status & acpi_gbl_fixed_event_info[i].status_bit_mask) && + (in_enable & acpi_gbl_fixed_event_info[i].enable_bit_mask)) { + return (TRUE); + } + } + + return (FALSE); +} + #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c index 04a40d563dd6..84b0b410310e 100644 --- a/drivers/acpi/acpica/evxfgpe.c +++ b/drivers/acpi/acpica/evxfgpe.c @@ -795,6 +795,38 @@ acpi_status acpi_enable_all_wakeup_gpes(void) ACPI_EXPORT_SYMBOL(acpi_enable_all_wakeup_gpes) +/****************************************************************************** + * + * FUNCTION: acpi_any_gpe_status_set + * + * PARAMETERS: None + * + * RETURN: Whether or not the status bit is set for any GPE + * + * DESCRIPTION: Check the status bits of all enabled GPEs and return TRUE if any + * of them is set or FALSE otherwise. + * + ******************************************************************************/ +u32 acpi_any_gpe_status_set(void) +{ + acpi_status status; + u8 ret; + + ACPI_FUNCTION_TRACE(acpi_any_gpe_status_set); + + status = acpi_ut_acquire_mutex(ACPI_MTX_EVENTS); + if (ACPI_FAILURE(status)) { + return (FALSE); + } + + ret = acpi_hw_check_all_gpes(); + (void)acpi_ut_release_mutex(ACPI_MTX_EVENTS); + + return (ret); +} + +ACPI_EXPORT_SYMBOL(acpi_any_gpe_status_set) + /******************************************************************************* * * FUNCTION: acpi_install_gpe_block diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c index 565bd3f29f31..b1d7d5f92495 100644 --- a/drivers/acpi/acpica/hwgpe.c +++ b/drivers/acpi/acpica/hwgpe.c @@ -444,6 +444,53 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info, return (AE_OK); } +/****************************************************************************** + * + * FUNCTION: acpi_hw_get_gpe_block_status + * + * PARAMETERS: gpe_xrupt_info - GPE Interrupt info + * gpe_block - Gpe Block info + * + * RETURN: Success + * + * DESCRIPTION: Produce a combined GPE status bits mask for the given block. + * + ******************************************************************************/ + +static acpi_status +acpi_hw_get_gpe_block_status(struct acpi_gpe_xrupt_info *gpe_xrupt_info, + struct acpi_gpe_block_info *gpe_block, + void *ret_ptr) +{ + struct acpi_gpe_register_info *gpe_register_info; + u64 in_enable, in_status; + acpi_status status; + u8 *ret = ret_ptr; + u32 i; + + /* Examine each GPE Register within the block */ + + for (i = 0; i < gpe_block->register_count; i++) { + gpe_register_info = &gpe_block->register_info[i]; + + status = acpi_hw_read(&in_enable, + &gpe_register_info->enable_address); + if (ACPI_FAILURE(status)) { + continue; + } + + status = acpi_hw_read(&in_status, + &gpe_register_info->status_address); + if (ACPI_FAILURE(status)) { + continue; + } + + *ret |= in_enable & in_status; + } + + return (AE_OK); +} + /****************************************************************************** * * FUNCTION: acpi_hw_disable_all_gpes @@ -510,4 +557,28 @@ acpi_status acpi_hw_enable_all_wakeup_gpes(void) return_ACPI_STATUS(status); } +/****************************************************************************** + * + * FUNCTION: acpi_hw_check_all_gpes + * + * PARAMETERS: None + * + * RETURN: Combined status of all GPEs + * + * DESCRIPTION: Check all enabled GPEs in all GPE blocks and return TRUE if the + * status bit is set for at least one of them of FALSE otherwise. + * + ******************************************************************************/ + +u8 acpi_hw_check_all_gpes(void) +{ + u8 ret = 0; + + ACPI_FUNCTION_TRACE(acpi_hw_check_all_gpes); + + (void)acpi_ev_walk_gpe_list(acpi_hw_get_gpe_block_status, &ret); + + return (ret != 0); +} + #endif /* !ACPI_REDUCED_HARDWARE */ diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 8906c80175e6..2122b83821ba 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); if (rc) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 8f0e0c8d8c3d..15cc7d5a6185 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -38,6 +38,8 @@ #define PREFIX "ACPI: " #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF +#define ACPI_BATTERY_CAPACITY_VALID(capacity) \ + ((capacity) != 0 && (capacity) != ACPI_BATTERY_VALUE_UNKNOWN) #define ACPI_BATTERY_DEVICE_NAME "Battery" @@ -192,7 +194,8 @@ static int acpi_battery_is_charged(struct acpi_battery *battery) static bool acpi_battery_is_degraded(struct acpi_battery *battery) { - return battery->full_charge_capacity && battery->design_capacity && + return ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && + ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity) && battery->full_charge_capacity < battery->design_capacity; } @@ -214,7 +217,7 @@ static int acpi_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { - int ret = 0; + int full_capacity = ACPI_BATTERY_VALUE_UNKNOWN, ret = 0; struct acpi_battery *battery = to_acpi_battery(psy); if (acpi_battery_present(battery)) { @@ -263,14 +266,14 @@ static int acpi_battery_get_property(struct power_supply *psy, break; case POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN: case POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN: - if (battery->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + if (!ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) ret = -ENODEV; else val->intval = battery->design_capacity * 1000; break; case POWER_SUPPLY_PROP_CHARGE_FULL: case POWER_SUPPLY_PROP_ENERGY_FULL: - if (battery->full_charge_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) ret = -ENODEV; else val->intval = battery->full_charge_capacity * 1000; @@ -283,11 +286,17 @@ static int acpi_battery_get_property(struct power_supply *psy, val->intval = battery->capacity_now * 1000; break; case POWER_SUPPLY_PROP_CAPACITY: - if (battery->capacity_now && battery->full_charge_capacity) - val->intval = battery->capacity_now * 100/ - battery->full_charge_capacity; + if (ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity)) + full_capacity = battery->full_charge_capacity; + else if (ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) + full_capacity = battery->design_capacity; + + if (battery->capacity_now == ACPI_BATTERY_VALUE_UNKNOWN || + full_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + ret = -ENODEV; else - val->intval = 0; + val->intval = battery->capacity_now * 100/ + full_capacity; break; case POWER_SUPPLY_PROP_CAPACITY_LEVEL: if (battery->state & ACPI_BATTERY_STATE_CRITICAL) @@ -333,6 +342,20 @@ static enum power_supply_property charge_battery_props[] = { POWER_SUPPLY_PROP_SERIAL_NUMBER, }; +static enum power_supply_property charge_battery_full_cap_broken_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_CYCLE_COUNT, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CURRENT_NOW, + POWER_SUPPLY_PROP_CHARGE_NOW, + POWER_SUPPLY_PROP_MODEL_NAME, + POWER_SUPPLY_PROP_MANUFACTURER, + POWER_SUPPLY_PROP_SERIAL_NUMBER, +}; + static enum power_supply_property energy_battery_props[] = { POWER_SUPPLY_PROP_STATUS, POWER_SUPPLY_PROP_PRESENT, @@ -794,20 +817,34 @@ static void __exit battery_hook_exit(void) static int sysfs_add_battery(struct acpi_battery *battery) { struct power_supply_config psy_cfg = { .drv_data = battery, }; + bool full_cap_broken = false; + + if (!ACPI_BATTERY_CAPACITY_VALID(battery->full_charge_capacity) && + !ACPI_BATTERY_CAPACITY_VALID(battery->design_capacity)) + full_cap_broken = true; if (battery->power_unit == ACPI_BATTERY_POWER_UNIT_MA) { - battery->bat_desc.properties = charge_battery_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(charge_battery_props); - } else if (battery->full_charge_capacity == 0) { - battery->bat_desc.properties = - energy_battery_full_cap_broken_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(energy_battery_full_cap_broken_props); + if (full_cap_broken) { + battery->bat_desc.properties = + charge_battery_full_cap_broken_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(charge_battery_full_cap_broken_props); + } else { + battery->bat_desc.properties = charge_battery_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(charge_battery_props); + } } else { - battery->bat_desc.properties = energy_battery_props; - battery->bat_desc.num_properties = - ARRAY_SIZE(energy_battery_props); + if (full_cap_broken) { + battery->bat_desc.properties = + energy_battery_full_cap_broken_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(energy_battery_full_cap_broken_props); + } else { + battery->bat_desc.properties = energy_battery_props; + battery->bat_desc.num_properties = + ARRAY_SIZE(energy_battery_props); + } } battery->bat_desc.name = acpi_device_bid(battery->device); diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index b758b45737f5..f6925f16c4a2 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -122,6 +122,17 @@ static const struct dmi_system_id dmi_lid_quirks[] = { }, .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, }, + { + /* + * Razer Blade Stealth 13 late 2019, notification of the LID device + * only happens on close, not on open and _LID always returns closed. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Razer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Razer Blade Stealth 13 Late 2019"), + }, + .driver_data = (void *)(long)ACPI_BUTTON_LID_INIT_OPEN, + }, {} }; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index d05be13c1022..bd74c7836675 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -179,6 +179,7 @@ EXPORT_SYMBOL(first_ec); static struct acpi_ec *boot_ec; static bool boot_ec_is_ecdt = false; +static struct workqueue_struct *ec_wq; static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ @@ -469,7 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec) ec_dbg_evt("Command(%s) submitted/blocked", acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); ec->nr_pending_queries++; - schedule_work(&ec->work); + queue_work(ec_wq, &ec->work); } } @@ -535,7 +536,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) #ifdef CONFIG_PM_SLEEP static void __acpi_ec_flush_work(void) { - flush_scheduled_work(); /* flush ec->work */ + drain_workqueue(ec_wq); /* flush ec->work */ flush_workqueue(ec_query_wq); /* flush queries */ } @@ -556,8 +557,8 @@ static void acpi_ec_disable_event(struct acpi_ec *ec) void acpi_ec_flush_work(void) { - /* Without ec_query_wq there is nothing to flush. */ - if (!ec_query_wq) + /* Without ec_wq there is nothing to flush. */ + if (!ec_wq) return; __acpi_ec_flush_work(); @@ -2115,25 +2116,33 @@ static struct acpi_driver acpi_ec_driver = { .drv.pm = &acpi_ec_pm, }; -static inline int acpi_ec_query_init(void) +static void acpi_ec_destroy_workqueues(void) { - if (!ec_query_wq) { - ec_query_wq = alloc_workqueue("kec_query", 0, - ec_max_queries); - if (!ec_query_wq) - return -ENODEV; + if (ec_wq) { + destroy_workqueue(ec_wq); + ec_wq = NULL; } - return 0; -} - -static inline void acpi_ec_query_exit(void) -{ if (ec_query_wq) { destroy_workqueue(ec_query_wq); ec_query_wq = NULL; } } +static int acpi_ec_init_workqueues(void) +{ + if (!ec_wq) + ec_wq = alloc_ordered_workqueue("kec", 0); + + if (!ec_query_wq) + ec_query_wq = alloc_workqueue("kec_query", 0, ec_max_queries); + + if (!ec_wq || !ec_query_wq) { + acpi_ec_destroy_workqueues(); + return -ENODEV; + } + return 0; +} + static const struct dmi_system_id acpi_ec_no_wakeup[] = { { .ident = "Thinkpad X1 Carbon 6th", @@ -2164,8 +2173,7 @@ int __init acpi_ec_init(void) int result; int ecdt_fail, dsdt_fail; - /* register workqueue for _Qxx evaluations */ - result = acpi_ec_query_init(); + result = acpi_ec_init_workqueues(); if (result) return result; @@ -2196,6 +2204,6 @@ static void __exit acpi_ec_exit(void) { acpi_bus_unregister_driver(&acpi_ec_driver); - acpi_ec_query_exit(); + acpi_ec_destroy_workqueues(); } #endif /* 0 */ diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6747a279621b..ce59a3f32eac 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -987,21 +987,41 @@ static void acpi_s2idle_sync(void) acpi_os_wait_events_complete(); /* synchronize Notify handling */ } -static void acpi_s2idle_wake(void) +static bool acpi_s2idle_wake(void) { - /* - * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has - * not triggered while suspended, so bail out. - */ - if (!acpi_sci_irq_valid() || - irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) - return; + if (!acpi_sci_irq_valid()) + return pm_wakeup_pending(); + + while (pm_wakeup_pending()) { + /* + * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the + * SCI has not triggered while suspended, so bail out (the + * wakeup is pending anyway and the SCI is not the source of + * it). + */ + if (irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) + return true; + + /* + * If the status bit of any enabled fixed event is set, the + * wakeup is regarded as valid. + */ + if (acpi_any_fixed_event_status_set()) + return true; + + /* + * If there are no EC events to process and at least one of the + * other enabled GPEs is active, the wakeup is regarded as a + * genuine one. + * + * Note that the checks below must be carried out in this order + * to avoid returning prematurely due to a change of the EC GPE + * status bit from unset to set between the checks with the + * status bits of all the other GPEs unset. + */ + if (acpi_any_gpe_status_set() && !acpi_ec_dispatch_gpe()) + return true; - /* - * If there are EC events to process, the wakeup may be a spurious one - * coming from the EC. - */ - if (acpi_ec_dispatch_gpe()) { /* * Cancel the wakeup and process all pending events in case * there are any wakeup ones in there. @@ -1014,8 +1034,19 @@ static void acpi_s2idle_wake(void) acpi_s2idle_sync(); + /* + * The SCI is in the "suspended" state now and it cannot produce + * new wakeup events till the rearming below, so if any of them + * are pending here, they must be resulting from the processing + * of EC events above or coming from somewhere else. + */ + if (pm_wakeup_pending()) + return true; + rearm_wake_irq(acpi_sci_irq); } + + return false; } static void acpi_s2idle_restore_early(void) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 31014c7d3793..e63fd7bfd3a5 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -336,6 +336,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + + /* + * Desktops which falsely report a backlight and which our heuristics + * for this do not catch. + */ { .callback = video_detect_force_none, .ident = "Dell OptiPlex 9020M", @@ -344,6 +349,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"), }, }, + { + .callback = video_detect_force_none, + .ident = "MSI MS-7721", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"), + }, + }, { }, }; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index b2dad43dbf82..59b217ffeb59 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5199,10 +5199,11 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) static int binder_open(struct inode *nodp, struct file *filp) { - struct binder_proc *proc; + struct binder_proc *proc, *itr; struct binder_device *binder_dev; struct binderfs_info *info; struct dentry *binder_binderfs_dir_entry_proc = NULL; + bool existing_pid = false; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); @@ -5225,6 +5226,7 @@ static int binder_open(struct inode *nodp, struct file *filp) binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -5235,19 +5237,24 @@ static int binder_open(struct inode *nodp, struct file *filp) filp->private_data = proc; mutex_lock(&binder_procs_lock); + hlist_for_each_entry(itr, &binder_procs, proc_node) { + if (itr->pid == proc->pid) { + existing_pid = true; + break; + } + } hlist_add_head(&proc->proc_node, &binder_procs); mutex_unlock(&binder_procs_lock); - if (binder_debugfs_dir_entry_proc) { + if (binder_debugfs_dir_entry_proc && !existing_pid) { char strbuf[11]; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); /* - * proc debug entries are shared between contexts, so - * this will fail if the process tries to open the driver - * again with a different context. The priting code will - * anyway print all contexts that a given PID has, so this - * is not a problem. + * proc debug entries are shared between contexts. + * Only create for the first PID to avoid debugfs log spamming + * The printing code will anyway print all contexts for a given + * PID so this is not a problem. */ proc->debugfs_entry = debugfs_create_file(strbuf, 0444, binder_debugfs_dir_entry_proc, @@ -5255,19 +5262,16 @@ static int binder_open(struct inode *nodp, struct file *filp) &proc_fops); } - if (binder_binderfs_dir_entry_proc) { + if (binder_binderfs_dir_entry_proc && !existing_pid) { char strbuf[11]; struct dentry *binderfs_entry; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); /* * Similar to debugfs, the process specific log file is shared - * between contexts. If the file has already been created for a - * process, the following binderfs_create_file() call will - * fail with error code EEXIST if another context of the same - * process invoked binder_open(). This is ok since same as - * debugfs, the log file will contain information on all - * contexts of a given PID. + * between contexts. Only create for the first PID. + * This is ok since same as debugfs, the log file will contain + * information on all contexts of a given PID. */ binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc, strbuf, &proc_fops, (void *)(unsigned long)proc->pid); @@ -5277,10 +5281,8 @@ static int binder_open(struct inode *nodp, struct file *filp) int error; error = PTR_ERR(binderfs_entry); - if (error != -EEXIST) { - pr_warn("Unable to create file %s in binderfs (error %d)\n", - strbuf, error); - } + pr_warn("Unable to create file %s in binderfs (error %d)\n", + strbuf, error); } } @@ -5402,6 +5404,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -5418,6 +5421,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -6074,6 +6083,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index ae991097d14d..283d3cb9c16e 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ struct binder_device { struct miscdevice miscdev; struct binder_context context; struct inode *binderfs_inode; + refcount_t ref; }; /** diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index e2580e5316a2..f303106b3362 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -154,6 +154,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode, if (!name) goto err; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; @@ -257,8 +258,10 @@ static void binderfs_evict_inode(struct inode *inode) ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); - kfree(device->context.name); - kfree(device); + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } } /** @@ -445,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 4bfd1b14b390..11ea1aff40db 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -81,6 +81,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -606,6 +607,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1877,6 +1879,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6f4ab5c5b52d..42c8728f6117 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6767,6 +6767,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7387,6 +7407,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f1a500205313..8fbd36eb8941 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1414,12 +1414,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<vci >= 0) && (vcc->vci < 1<dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1480,9 +1482,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); diff --git a/drivers/base/component.c b/drivers/base/component.c index 532a3a5d8f63..1fdbd6ff2058 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -102,11 +102,11 @@ static int component_devices_show(struct seq_file *s, void *data) seq_printf(s, "%-40s %20s\n", "device name", "status"); seq_puts(s, "-------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { - struct device *d = (struct device *)match->compare[i].data; + struct component *component = match->compare[i].component; - seq_printf(s, "%-40s %20s\n", dev_name(d), - match->compare[i].component ? - "registered" : "not registered"); + seq_printf(s, "%-40s %20s\n", + component ? dev_name(component->dev) : "(unknown)", + component ? (component->bound ? "bound" : "not bound") : "not registered"); } mutex_unlock(&component_mutex); diff --git a/drivers/base/core.c b/drivers/base/core.c index 42a672456432..dbb0f9130f42 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -718,6 +718,8 @@ static void __device_links_queue_sync_state(struct device *dev, { struct device_link *link; + if (!dev_has_sync_state(dev)) + return; if (dev->state_synced) return; @@ -745,25 +747,31 @@ static void __device_links_queue_sync_state(struct device *dev, /** * device_links_flush_sync_list - Call sync_state() on a list of devices * @list: List of devices to call sync_state() on + * @dont_lock_dev: Device for which lock is already held by the caller * * Calls sync_state() on all the devices that have been queued for it. This - * function is used in conjunction with __device_links_queue_sync_state(). + * function is used in conjunction with __device_links_queue_sync_state(). The + * @dont_lock_dev parameter is useful when this function is called from a + * context where a device lock is already held. */ -static void device_links_flush_sync_list(struct list_head *list) +static void device_links_flush_sync_list(struct list_head *list, + struct device *dont_lock_dev) { struct device *dev, *tmp; list_for_each_entry_safe(dev, tmp, list, links.defer_sync) { list_del_init(&dev->links.defer_sync); - device_lock(dev); + if (dev != dont_lock_dev) + device_lock(dev); if (dev->bus->sync_state) dev->bus->sync_state(dev); else if (dev->driver && dev->driver->sync_state) dev->driver->sync_state(dev); - device_unlock(dev); + if (dev != dont_lock_dev) + device_unlock(dev); put_device(dev); } @@ -801,7 +809,7 @@ void device_links_supplier_sync_state_resume(void) out: device_links_write_unlock(); - device_links_flush_sync_list(&sync_list); + device_links_flush_sync_list(&sync_list, NULL); } static int sync_state_resume_initcall(void) @@ -813,7 +821,7 @@ late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { - if (list_empty(&sup->links.defer_sync)) + if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup)) list_add_tail(&sup->links.defer_sync, &deferred_sync); } @@ -865,6 +873,11 @@ void device_links_driver_bound(struct device *dev) driver_deferred_probe_add(link->consumer); } + if (defer_sync_state_count) + __device_links_supplier_defer_sync(dev); + else + __device_links_queue_sync_state(dev, &sync_list); + list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; @@ -883,7 +896,7 @@ void device_links_driver_bound(struct device *dev) device_links_write_unlock(); - device_links_flush_sync_list(&sync_list); + device_links_flush_sync_list(&sync_list, dev); } static void device_link_drop_managed(struct device_link *link) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index d811e60610d3..b25bcab2a26b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -516,7 +516,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); - WARN_ON(!list_empty(&dev->devres_head)); + if (!list_empty(&dev->devres_head)) { + dev_crit(dev, "Resources present before probing\n"); + return -EBUSY; + } re_probe: dev->driver = drv; diff --git a/drivers/base/platform.c b/drivers/base/platform.c index cf6b6b722e5c..b5ce7b085795 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -48,7 +49,7 @@ EXPORT_SYMBOL_GPL(platform_bus); struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -255,7 +256,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -362,10 +363,10 @@ static void setup_pdev_dma_masks(struct platform_device *pdev) { if (!pdev->dev.coherent_dma_mask) pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - if (!pdev->dma_mask) - pdev->dma_mask = DMA_BIT_MASK(32); - if (!pdev->dev.dma_mask) - pdev->dev.dma_mask = &pdev->dma_mask; + if (!pdev->dev.dma_mask) { + pdev->platform_dma_mask = DMA_BIT_MASK(32); + pdev->dev.dma_mask = &pdev->platform_dma_mask; + } }; /** @@ -501,7 +502,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties); */ int platform_device_add(struct platform_device *pdev) { - int i, ret; + u32 i; + int ret; if (!pdev) return -EINVAL; @@ -569,7 +571,7 @@ int platform_device_add(struct platform_device *pdev) pdev->id = PLATFORM_DEVID_AUTO; } - while (--i >= 0) { + while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); @@ -590,7 +592,7 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + u32 i; if (!IS_ERR_OR_NULL(pdev)) { device_del(&pdev->dev); @@ -660,20 +662,8 @@ struct platform_device *platform_device_register_full( pdev->dev.of_node_reused = pdevinfo->of_node_reused; if (pdevinfo->dma_mask) { - /* - * This memory isn't freed when the device is put, - * I don't have a nice idea for that though. Conceptually - * dma_mask in struct device should not be a pointer. - * See http://thread.gmane.org/gmane.linux.kernel.pci/9081 - */ - pdev->dev.dma_mask = - kmalloc(sizeof(*pdev->dev.dma_mask), GFP_KERNEL); - if (!pdev->dev.dma_mask) - goto err; - - kmemleak_ignore(pdev->dev.dma_mask); - - *pdev->dev.dma_mask = pdevinfo->dma_mask; + pdev->platform_dma_mask = pdevinfo->dma_mask; + pdev->dev.dma_mask = &pdev->platform_dma_mask; pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } @@ -698,7 +688,6 @@ struct platform_device *platform_device_register_full( if (ret) { err: ACPI_COMPANION_SET(&pdev->dev, NULL); - kfree(pdev->dev.dma_mask); platform_device_put(pdev); return ERR_PTR(ret); } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 134a8af51511..0e99a760aebd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -273,10 +273,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) device_links_read_unlock(idx); } -static void dpm_wait_for_superior(struct device *dev, bool async) +static bool dpm_wait_for_superior(struct device *dev, bool async) { - dpm_wait(dev->parent, async); + struct device *parent; + + /* + * If the device is resumed asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by reference + * counting the parent once more unless the device has been deleted + * already (in which case return right away). + */ + mutex_lock(&dpm_list_mtx); + + if (!device_pm_initialized(dev)) { + mutex_unlock(&dpm_list_mtx); + return false; + } + + parent = get_device(dev->parent); + + mutex_unlock(&dpm_list_mtx); + + dpm_wait(parent, async); + put_device(parent); + dpm_wait_for_suppliers(dev, async); + + /* + * If the parent's callback has deleted the device, attempting to resume + * it would be invalid, so avoid doing that then. + */ + return device_pm_initialized(dev); } static void dpm_wait_for_consumers(struct device *dev, bool async) @@ -621,7 +649,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_noirq_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; skip_resume = dev_pm_may_skip_resume(dev); @@ -829,7 +858,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_late_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; callback = dpm_subsys_resume_early_cb(dev, state, &info); @@ -944,7 +974,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; } - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Complete; + dpm_watchdog_set(&wd, dev); device_lock(dev); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 19f57ccfbe1d..59f911e57719 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1488,11 +1488,18 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, WARN_ON(!map->bus); - /* Check for unwritable registers before we start */ - for (i = 0; i < val_len / map->format.val_bytes; i++) - if (!regmap_writeable(map, - reg + regmap_get_offset(map, i))) - return -EINVAL; + /* Check for unwritable or noinc registers in range + * before we start + */ + if (!regmap_writeable_noinc(map, reg)) { + for (i = 0; i < val_len / map->format.val_bytes; i++) { + unsigned int element = + reg + regmap_get_offset(map, i); + if (!regmap_writeable(map, element) || + regmap_writeable_noinc(map, element)) + return -EINVAL; + } + } if (!map->cache_bypass && map->format.parse_val) { unsigned int ival; diff --git a/drivers/base/test/test_async_driver_probe.c b/drivers/base/test/test_async_driver_probe.c index f4b1d8e54daf..3bb7beb127a9 100644 --- a/drivers/base/test/test_async_driver_probe.c +++ b/drivers/base/test/test_async_driver_probe.c @@ -44,7 +44,8 @@ static int test_probe(struct platform_device *pdev) * performing an async init on that node. */ if (dev->driver->probe_type == PROBE_PREFER_ASYNCHRONOUS) { - if (dev_to_node(dev) != numa_node_id()) { + if (IS_ENABLED(CONFIG_NUMA) && + dev_to_node(dev) != numa_node_id()) { dev_warn(dev, "NUMA node mismatch %d != %d\n", dev_to_node(dev), numa_node_id()); atomic_inc(&warnings); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index a8730cc4db10..220c5e18aba0 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -473,6 +473,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -496,8 +515,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 485865fd0412..f19a03b62365 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -853,14 +853,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index b4607dd96185..78181908f0df 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1265,6 +1265,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2b184563cd32..38dcb39051a7 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2662,7 +2662,7 @@ static int rbd_img_fill_nodata(struct rbd_img_request *img_req, u64 off, u64 len) { struct ceph_file_extent ex = { off, len }; - union rbd_img_fill_iter dummy; + union rbd_img_fill_iter dummy = {}; struct rbd_img_fill_ctx fctx = { .pos_type = OBJ_REQUEST_NODATA, .pos = &dummy, diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 7ffd719d89de..c2ed3e9128e3 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -339,10 +339,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) return BLK_STS_DEV_RESOURCE; return BLK_STS_IOERR; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4285e75e52c3..1bf4a908a0bd 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -626,7 +626,7 @@ static ssize_t writeback_store(struct device *dev, struct bio bio; struct bio_vec bio_vec; struct page *page; - ssize_t ret; + ssize_t ret = len; int mode; unsigned long blk_idx = 0; @@ -762,7 +762,6 @@ static ssize_t writeback_store(struct device *dev, if (blk_idx) free_block_bdev(zram, blk_idx); - ret = len; __free_page(page); release_init_lock: up_read(&zram->init_lock); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 70e385987d41..a6dad2f6995a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2602,7 +2602,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) * and being processed the events from there then. */ if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { - data->evt_skb = skb_clone(skb, GFP_KERNEL); + data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) goto err_out; } @@ -2867,7 +2867,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname) err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params); if (err < 0) { bt_dev_err(hdev, "Failed to send wmt rst (%d)", err); - return err; + goto err_release_fw; } /* Wait a few moments for firmware activation done */ @@ -3832,6 +3832,10 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + + err = usb_autopm_get_interface(intf); + if (err < 0) + goto out_free_dev; } if (id->driver_info & BTUSB_AMP) { diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 50200d1c06ea..6095b6df8a81 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -139,7 +139,6 @@ config TEGRA_ACONNECT tristate "Tegra ACONNECT Bus Driver" depends on ARCH_TEGRA_210_SOC depends on OF && PM - select PM_CLK help Driver for the Tegra ACONNECT bus which is used to interface with the devices inside the Audio Processing Engine (APE) for Tegra210. diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index a07cc19becdb..c78d10ea641f 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -715,9 +715,9 @@ EXPORT_SYMBOL_GPL(fsl_mc_device_remove); struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) { struct fsl_mc_device *mc_bus_dev, *endpoint; - struct fsl_mc_obj_desc endpoint_desc = { 0 }; - struct dprc_endpoint endpoint1 = { 0 }; - struct dprc_endpoint endpoint2 = { 0 }; + struct fsl_mc_obj_desc endpoint_desc = {{ 0 }}; + struct dprc_endpoint endpoint1 = {{ 0 }}; + struct dprc_endpoint endpoint2 = {{ 0 }}; int state, err; mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent); diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 36cf13eee6b8..68413bf9cf87 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -466,7 +466,7 @@ static ssize_t input_read(struct file *file, char __user *buf, size_t len, { struct moxtet *moxtet = file->private_data; u8 bin[TURRIS_MOX_MAX_MODULES]; - u8 hex[sizeof(buf) * 2 + 1]; + u8 hex[sizeof(bin) * 2 + 1]; int ret, n; ret = moxtet_spi_read(moxtet, bin); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index ccb44fe790a7..c42447d5d5a8 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -479,7 +479,7 @@ static void sysc_clkdm_deny_idle(struct sysc *ddata) { struct ti_sysc_platform_data *pdata; - if (ddata->legacy_mode) + if (ddata->legacy_mode || (ddata->cfg.quirks & SYSC_QUIRK_CLKDM_NOAUTO)) return; pdata = dev_get_platdata(ddata->dev); @@ -491,7 +491,7 @@ static void sysc_clkdm_allow_idle(struct sysc *ddata) { struct ti_sysc_platform_data *pdata; - if (ddata->legacy_mode) + if (ddata->legacy_mode || (ddata->cfg.quirks & SYSC_QUIRK_CLKDM_NOAUTO)) return; pdata = dev_get_platdata(ddata->dev); @@ -1251,6 +1251,12 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { /* Quirks that need to be set based on detected module */ SYSC_QUIRK("aess", 0, 0, 0x10, -1, 0x40000000, 0xffffffff, SYSC_MODULE_QUIRK_AESS), + SYSC_QUIRK("dcan", 0x48480000, 0x20, -1, -1, 0xa3170504, 0xffffffff, + SYSC_QUIRK_CLKDM_NOAUTO), + SYSC_QUIRK("dwc3", 0x48880000, 0, 0x10, -1, 0x500a0200, 0xffffffff, + SYSC_QUIRK_CLKDM_NOAUTO), + SYSC_QUIRK("dwc3", 0x488c0000, 0, 0x10, -1, 0x500a0200, 0xffffffff, + SYSC_QUIRK_CLKDM_NOAUTO), SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff, SYSC_MODULE_QUIRK_HDQ1W), SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x0000000a, 0xffffffff, @@ -1400,7 +1406,7 @@ static void sysc_init_revision_quirks(struct sysc *ddata) } /* 1-wire needs module's internal clocks enabled for reset */ -static void sysc_clk_enable_quirk_hdq1w(struct sysc *ddata) +static void sysc_pre_reset_quirk_hdq1w(struct sysc *ddata) { int offset = 0x0c; /* HDQ_CTRL_STATUS */ u16 val; @@ -1488,7 +1494,7 @@ static void sysc_init_module_quirks(struct sysc *ddata) return; if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_HDQ1W) { - ddata->clk_enable_quirk = sysc_clk_enable_quirk_hdq1w; + ddata->clk_disable_quirk = sysc_pre_reset_quirk_hdq1w; return; } diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 9ac6671bb514..f69609b47fef 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -855,7 +855,7 @@ int hpet_alloc(struct hpet_data *hdp) return 0; } - hpetp = kzalloc(struct_size(hpetp, hp_dev, hdp->hd_nirqs - 1), + hpetp = kzalloc(struct_size(hpetp, hp_dev, hdp->hd_nirqs), GFP_KERNEL); if (!hpetp) diff --git a/drivers/char/ipmi/ipmb_dev_int.c b/drivers/char/ipmi/ipmb_dev_int.c index 1ff4fb1def7c..800532595ea7 100644 --- a/drivers/char/ipmi/ipmb_dev_int.c +++ b/drivers/char/ipmi/ipmb_dev_int.c @@ -253,7 +253,7 @@ static int ipmb_slave_cb(struct i2c_client *client, break; case I2C_SLAVE_WRITE_RECEIVED: - if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg)) + if (ipmb_dev->msg_idx >= sizeof(struct ipmb_msg) - 1) break; buf[++ipmb_dev->msg_idx] = *val; diff --git a/drivers/char/ipmi/ipmi_si_platform.c b/drivers/char/ipmi/ipmi_si_platform.c index c78127ccbc0d..638c693e17ad 100644 --- a/drivers/char/ipmi/ipmi_si_platform.c +++ b/drivers/char/ipmi/ipmi_si_platform.c @@ -194,7 +194,7 @@ static int platform_ipmi_probe(struct platform_device *pdev) else io.slave_addr = slave_addr; - io.irq = platform_get_irq(pdev, 0); + io.irq = platform_get_irq_optional(pdev, 0); if (io.irq > 0) io.irq_setup = ipmi_std_irq_setup; else @@ -378,7 +378,7 @@ static int acpi_ipmi_probe(struct platform_device *pdev) io.irq = tmp; io.irq_setup = acpi_gpe_irq_setup; } else { - int irq = platform_get_irq(pdev, 0); + int irq = platform_get_irq_optional(pdev, 0); if (irq > 0) { io.irq = irq; diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 22c6a2e61236..8ac390c2b514 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -775,10 +775,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); msg = ssif_info->curr_msg; if (msg) { + if (data) { + if (len > IPMI_MAX_MSG_LENGTH) + len = IPMI_MAX_MSG_LENGTH; + memcpy(msg->rsp, data, len); + } else { + len = 0; + } msg->rsp_size = len; - if (msg->rsp_size > IPMI_MAX_MSG_LENGTH) - msg->rsp_size = IPMI_MAX_MSG_LENGTH; - memcpy(msg->rsp, data, msg->rsp_size); ssif_info->curr_msg = NULL; } diff --git a/drivers/char/random.c b/drivers/char/random.c index cda12933a17d..ea1973d35843 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1687,8 +1687,9 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, print_once = true; #endif if (__ratelimit(&unseeded_warning)) - pr_notice("random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); + printk_deferred(KERN_NOTICE "random: %s called from %pS " + "with crng_init=%d\n", func_name, caller, + crng_init); } /* diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 5a0d99d4fec0..9567e5197f74 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -21,9 +21,11 @@ tpm-$(CONFIG_EFI) += eventlog/efi.o tpm-$(CONFIG_OF) += eventlog/of.o obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o obj-$(CONFIG_TCG_TIS) += tpm_tis.o -obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi_mod.o -tpm_tis_spi_mod-y := tpm_tis_spi.o -tpm_tis_spi_mod-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o + +obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o +tpm_tis_spi-y := tpm_tis_spi_main.o +tpm_tis_spi-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o + obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 13696deceae8..760329598b99 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -525,6 +525,8 @@ static int tpm2_init_bank_info(struct tpm_chip *chip, u32 bank_index) return 0; } + bank->crypto_id = HASH_ALGO__LAST; + return tpm2_pcr_read(chip, 0, &digest, &bank->digest_size); } diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi_main.c similarity index 100% rename from drivers/char/tpm/tpm_tis_spi.c rename to drivers/char/tpm/tpm_tis_spi_main.c diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c index 4f24e46ebe7c..56db949a7b70 100644 --- a/drivers/char/ttyprintk.c +++ b/drivers/char/ttyprintk.c @@ -15,10 +15,11 @@ #include #include #include +#include struct ttyprintk_port { struct tty_port port; - struct mutex port_write_mutex; + spinlock_t spinlock; }; static struct ttyprintk_port tpk_port; @@ -99,11 +100,12 @@ static int tpk_open(struct tty_struct *tty, struct file *filp) static void tpk_close(struct tty_struct *tty, struct file *filp) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); /* flush tpk_printk buffer */ tpk_printk(NULL, 0); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); tty_port_close(&tpkp->port, tty, filp); } @@ -115,13 +117,14 @@ static int tpk_write(struct tty_struct *tty, const unsigned char *buf, int count) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; int ret; /* exclusive use of tpk_printk within this tty */ - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); ret = tpk_printk(buf, count); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); return ret; } @@ -171,7 +174,7 @@ static int __init ttyprintk_init(void) { int ret = -ENOMEM; - mutex_init(&tpk_port.port_write_mutex); + spin_lock_init(&tpk_port.spinlock); ttyprintk_driver = tty_alloc_driver(1, TTY_DRIVER_RESET_TERMIOS | diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c index 86238d5ecb4d..77398aefeb6d 100644 --- a/drivers/clk/at91/sam9x60.c +++ b/drivers/clk/at91/sam9x60.c @@ -47,6 +47,7 @@ static const struct clk_programmable_layout sam9x60_programmable_layout = { .pres_shift = 8, .css_mask = 0x1f, .have_slck_mck = 0, + .is_pres_direct = 1, }; static const struct clk_pcr_layout sam9x60_pcr_layout = { diff --git a/drivers/clk/clk-bm1880.c b/drivers/clk/clk-bm1880.c index 4cd175afce9b..e6d6599d310a 100644 --- a/drivers/clk/clk-bm1880.c +++ b/drivers/clk/clk-bm1880.c @@ -474,11 +474,10 @@ static struct bm1880_composite_clock bm1880_composite_clks[] = { static unsigned long bm1880_pll_rate_calc(u32 regval, unsigned long parent_rate) { u64 numerator; - u32 fbdiv, fref, refdiv; + u32 fbdiv, refdiv; u32 postdiv1, postdiv2, denominator; fbdiv = (regval >> 16) & 0xfff; - fref = parent_rate; refdiv = regval & 0x1f; postdiv1 = (regval >> 8) & 0x7; postdiv2 = (regval >> 12) & 0x7; diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 772258de2d1f..66f056ac4c15 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3338,6 +3338,21 @@ static int __clk_core_init(struct clk_core *core) goto out; } + /* + * optional platform-specific magic + * + * The .init callback is not used by any of the basic clock types, but + * exists for weird hardware that must perform initialization magic. + * Please consider other ways of solving initialization problems before + * using this callback, as its use is discouraged. + * + * If it exist, this callback should called before any other callback of + * the clock + */ + if (core->ops->init) + core->ops->init(core->hw); + + core->parent = __clk_init_parent(core); /* @@ -3362,17 +3377,6 @@ static int __clk_core_init(struct clk_core *core) core->orphan = true; } - /* - * optional platform-specific magic - * - * The .init callback is not used by any of the basic clock types, but - * exists for weird hardware that must perform initialization magic. - * Please consider other ways of solving initialization problems before - * using this callback, as its use is discouraged. - */ - if (core->ops->init) - core->ops->init(core->hw); - /* * Set clk's accuracy. The preferred method is to use * .recalc_accuracy. For simple clocks and lazy developers the default @@ -3732,6 +3736,28 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) return ERR_PTR(ret); } +/** + * dev_or_parent_of_node() - Get device node of @dev or @dev's parent + * @dev: Device to get device node of + * + * Return: device node pointer of @dev, or the device node pointer of + * @dev->parent if dev doesn't have a device node, or NULL if neither + * @dev or @dev->parent have a device node. + */ +static struct device_node *dev_or_parent_of_node(struct device *dev) +{ + struct device_node *np; + + if (!dev) + return NULL; + + np = dev_of_node(dev); + if (!np) + np = dev_of_node(dev->parent); + + return np; +} + /** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock @@ -3747,7 +3773,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) */ struct clk *clk_register(struct device *dev, struct clk_hw *hw) { - return __clk_register(dev, dev_of_node(dev), hw); + return __clk_register(dev, dev_or_parent_of_node(dev), hw); } EXPORT_SYMBOL_GPL(clk_register); @@ -3763,7 +3789,8 @@ EXPORT_SYMBOL_GPL(clk_register); */ int clk_hw_register(struct device *dev, struct clk_hw *hw) { - return PTR_ERR_OR_ZERO(__clk_register(dev, dev_of_node(dev), hw)); + return PTR_ERR_OR_ZERO(__clk_register(dev, dev_or_parent_of_node(dev), + hw)); } EXPORT_SYMBOL_GPL(clk_hw_register); diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index bc5bb6ac8636..30ddbc1ced2e 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -54,48 +54,48 @@ extern struct imx_pll14xx_clk imx_1416x_pll; extern struct imx_pll14xx_clk imx_1443x_pll; #define imx_clk_cpu(name, parent_name, div, mux, pll, step) \ - imx_clk_hw_cpu(name, parent_name, div, mux, pll, step)->clk + to_clk(imx_clk_hw_cpu(name, parent_name, div, mux, pll, step)) #define clk_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \ cgr_val, clk_gate_flags, lock, share_count) \ - clk_hw_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \ - cgr_val, clk_gate_flags, lock, share_count)->clk + to_clk(clk_hw_register_gate2(dev, name, parent_name, flags, reg, bit_idx, \ + cgr_val, clk_gate_flags, lock, share_count)) #define imx_clk_pllv3(type, name, parent_name, base, div_mask) \ - imx_clk_hw_pllv3(type, name, parent_name, base, div_mask)->clk + to_clk(imx_clk_hw_pllv3(type, name, parent_name, base, div_mask)) #define imx_clk_pfd(name, parent_name, reg, idx) \ - imx_clk_hw_pfd(name, parent_name, reg, idx)->clk + to_clk(imx_clk_hw_pfd(name, parent_name, reg, idx)) #define imx_clk_gate_exclusive(name, parent, reg, shift, exclusive_mask) \ - imx_clk_hw_gate_exclusive(name, parent, reg, shift, exclusive_mask)->clk + to_clk(imx_clk_hw_gate_exclusive(name, parent, reg, shift, exclusive_mask)) #define imx_clk_fixed_factor(name, parent, mult, div) \ - imx_clk_hw_fixed_factor(name, parent, mult, div)->clk + to_clk(imx_clk_hw_fixed_factor(name, parent, mult, div)) #define imx_clk_divider2(name, parent, reg, shift, width) \ - imx_clk_hw_divider2(name, parent, reg, shift, width)->clk + to_clk(imx_clk_hw_divider2(name, parent, reg, shift, width)) #define imx_clk_gate_dis(name, parent, reg, shift) \ - imx_clk_hw_gate_dis(name, parent, reg, shift)->clk + to_clk(imx_clk_hw_gate_dis(name, parent, reg, shift)) #define imx_clk_gate2(name, parent, reg, shift) \ - imx_clk_hw_gate2(name, parent, reg, shift)->clk + to_clk(imx_clk_hw_gate2(name, parent, reg, shift)) #define imx_clk_gate2_flags(name, parent, reg, shift, flags) \ - imx_clk_hw_gate2_flags(name, parent, reg, shift, flags)->clk + to_clk(imx_clk_hw_gate2_flags(name, parent, reg, shift, flags)) #define imx_clk_gate2_shared2(name, parent, reg, shift, share_count) \ - imx_clk_hw_gate2_shared2(name, parent, reg, shift, share_count)->clk + to_clk(imx_clk_hw_gate2_shared2(name, parent, reg, shift, share_count)) #define imx_clk_gate3(name, parent, reg, shift) \ - imx_clk_hw_gate3(name, parent, reg, shift)->clk + to_clk(imx_clk_hw_gate3(name, parent, reg, shift)) #define imx_clk_gate4(name, parent, reg, shift) \ - imx_clk_hw_gate4(name, parent, reg, shift)->clk + to_clk(imx_clk_hw_gate4(name, parent, reg, shift)) #define imx_clk_mux(name, reg, shift, width, parents, num_parents) \ - imx_clk_hw_mux(name, reg, shift, width, parents, num_parents)->clk + to_clk(imx_clk_hw_mux(name, reg, shift, width, parents, num_parents)) struct clk *imx_clk_pll14xx(const char *name, const char *parent_name, void __iomem *base, const struct imx_pll14xx_clk *pll_clk); @@ -198,6 +198,13 @@ struct clk_hw *imx_clk_hw_fixup_mux(const char *name, void __iomem *reg, u8 shift, u8 width, const char * const *parents, int num_parents, void (*fixup)(u32 *val)); +static inline struct clk *to_clk(struct clk_hw *hw) +{ + if (IS_ERR_OR_NULL(hw)) + return ERR_CAST(hw); + return hw->clk; +} + static inline struct clk *imx_clk_fixed(const char *name, int rate) { return clk_register_fixed_rate(NULL, name, NULL, 0, rate); diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c index ddb1e5634739..3a5853ca98c6 100644 --- a/drivers/clk/meson/clk-pll.c +++ b/drivers/clk/meson/clk-pll.c @@ -77,6 +77,15 @@ static unsigned long meson_clk_pll_recalc_rate(struct clk_hw *hw, unsigned int m, n, frac; n = meson_parm_read(clk->map, &pll->n); + + /* + * On some HW, N is set to zero on init. This value is invalid as + * it would result in a division by zero. The rate can't be + * calculated in this case + */ + if (n == 0) + return 0; + m = meson_parm_read(clk->map, &pll->m); frac = MESON_PARM_APPLICABLE(&pll->frac) ? diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c index b3af61cc6fb9..d2760a021301 100644 --- a/drivers/clk/meson/g12a.c +++ b/drivers/clk/meson/g12a.c @@ -4692,6 +4692,7 @@ static struct clk_regmap *const g12a_clk_regmaps[] = { &g12a_bt656, &g12a_usb1_to_ddr, &g12a_mmc_pclk, + &g12a_uart2, &g12a_vpu_intr, &g12a_gic, &g12a_sd_emmc_a_clk0, diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c index 67e6691e080c..8856ce476ccf 100644 --- a/drivers/clk/meson/meson8b.c +++ b/drivers/clk/meson/meson8b.c @@ -1764,8 +1764,11 @@ static struct clk_regmap meson8b_hdmi_sys = { /* * The MALI IP is clocked by two identical clocks (mali_0 and mali_1) - * muxed by a glitch-free switch on Meson8b and Meson8m2. Meson8 only - * has mali_0 and no glitch-free mux. + * muxed by a glitch-free switch on Meson8b and Meson8m2. The CCF can + * actually manage this glitch-free mux because it does top-to-bottom + * updates the each clock tree and switches to the "inactive" one when + * CLK_SET_RATE_GATE is set. + * Meson8 only has mali_0 and no glitch-free mux. */ static const struct clk_hw *meson8b_mali_0_1_parent_hws[] = { &meson8b_xtal.hw, @@ -1830,7 +1833,7 @@ static struct clk_regmap meson8b_mali_0 = { &meson8b_mali_0_div.hw }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT, }, }; @@ -1885,7 +1888,7 @@ static struct clk_regmap meson8b_mali_1 = { &meson8b_mali_1_div.hw }, .num_parents = 1, - .flags = CLK_SET_RATE_PARENT, + .flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT, }, }; diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 8f4b9bec2956..cecdb07ce13b 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -217,6 +217,9 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); + if (!p) + return -EINVAL; + if (clk_flags & CLK_SET_RATE_PARENT) { rate = f->freq; if (f->pre_div) { @@ -952,7 +955,7 @@ static void clk_rcg2_dfs_populate_freq(struct clk_hw *hw, unsigned int l, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct clk_hw *p; unsigned long prate = 0; - u32 val, mask, cfg, mode; + u32 val, mask, cfg, mode, src; int i, num_parents; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + SE_PERF_DFSR(l), &cfg); @@ -962,12 +965,12 @@ static void clk_rcg2_dfs_populate_freq(struct clk_hw *hw, unsigned int l, if (cfg & mask) f->pre_div = cfg & mask; - cfg &= CFG_SRC_SEL_MASK; - cfg >>= CFG_SRC_SEL_SHIFT; + src = cfg & CFG_SRC_SEL_MASK; + src >>= CFG_SRC_SEL_SHIFT; num_parents = clk_hw_get_num_parents(hw); for (i = 0; i < num_parents; i++) { - if (cfg == rcg->parent_map[i].cfg) { + if (src == rcg->parent_map[i].cfg) { f->src = rcg->parent_map[i].src; p = clk_hw_get_parent_by_index(&rcg->clkr.hw, i); prate = clk_hw_get_rate(p); diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c index 7ed313ad6e43..d9e17b91c68e 100644 --- a/drivers/clk/qcom/clk-rpmh.c +++ b/drivers/clk/qcom/clk-rpmh.c @@ -481,9 +481,9 @@ static int clk_rpmh_probe(struct platform_device *pdev) } static const struct of_device_id clk_rpmh_match_table[] = { + { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180}, { .compatible = "qcom,sdm845-rpmh-clk", .data = &clk_rpmh_sdm845}, { .compatible = "qcom,sm8150-rpmh-clk", .data = &clk_rpmh_sm8150}, - { .compatible = "qcom,sc7180-rpmh-clk", .data = &clk_rpmh_sc7180}, { } }; MODULE_DEVICE_TABLE(of, clk_rpmh_match_table); diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 930fa4a4c52a..e5c3db11bf26 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -648,6 +648,7 @@ static const struct rpm_smd_clk_desc rpm_clk_qcs404 = { }; /* msm8998 */ +DEFINE_CLK_SMD_RPM(msm8998, bimc_clk, bimc_a_clk, QCOM_SMD_RPM_MEM_CLK, 0); DEFINE_CLK_SMD_RPM(msm8998, pcnoc_clk, pcnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 0); DEFINE_CLK_SMD_RPM(msm8998, snoc_clk, snoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 1); DEFINE_CLK_SMD_RPM(msm8998, cnoc_clk, cnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 2); @@ -671,6 +672,8 @@ DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk2_pin, rf_clk2_a_pin, 5); DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, rf_clk3, rf_clk3_a, 6); DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk3_pin, rf_clk3_a_pin, 6); static struct clk_smd_rpm *msm8998_clks[] = { + [RPM_SMD_BIMC_CLK] = &msm8998_bimc_clk, + [RPM_SMD_BIMC_A_CLK] = &msm8998_bimc_a_clk, [RPM_SMD_PCNOC_CLK] = &msm8998_pcnoc_clk, [RPM_SMD_PCNOC_A_CLK] = &msm8998_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8998_snoc_clk, diff --git a/drivers/clk/qcom/gcc-msm8998.c b/drivers/clk/qcom/gcc-msm8998.c index cf31b5d03270..df1d7056436c 100644 --- a/drivers/clk/qcom/gcc-msm8998.c +++ b/drivers/clk/qcom/gcc-msm8998.c @@ -1996,6 +1996,19 @@ static struct clk_branch gcc_gp3_clk = { }, }; +static struct clk_branch gcc_bimc_gfx_clk = { + .halt_reg = 0x46040, + .halt_check = BRANCH_HALT, + .clkr = { + .enable_reg = 0x46040, + .enable_mask = BIT(0), + .hw.init = &(struct clk_init_data){ + .name = "gcc_bimc_gfx_clk", + .ops = &clk_branch2_ops, + }, + }, +}; + static struct clk_branch gcc_gpu_bimc_gfx_clk = { .halt_reg = 0x71010, .halt_check = BRANCH_HALT, @@ -2810,6 +2823,7 @@ static struct clk_regmap *gcc_msm8998_clocks[] = { [GCC_GP1_CLK] = &gcc_gp1_clk.clkr, [GCC_GP2_CLK] = &gcc_gp2_clk.clkr, [GCC_GP3_CLK] = &gcc_gp3_clk.clkr, + [GCC_BIMC_GFX_CLK] = &gcc_bimc_gfx_clk.clkr, [GCC_GPU_BIMC_GFX_CLK] = &gcc_gpu_bimc_gfx_clk.clkr, [GCC_GPU_BIMC_GFX_SRC_CLK] = &gcc_gpu_bimc_gfx_src_clk.clkr, [GCC_GPU_CFG_AHB_CLK] = &gcc_gpu_cfg_ahb_clk.clkr, diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c index c97b647db9b6..488f8b3980c5 100644 --- a/drivers/clk/renesas/rcar-gen3-cpg.c +++ b/drivers/clk/renesas/rcar-gen3-cpg.c @@ -470,7 +470,8 @@ static struct clk * __init cpg_rpc_clk_register(const char *name, clk = clk_register_composite(NULL, name, &parent_name, 1, NULL, NULL, &rpc->div.hw, &clk_divider_ops, - &rpc->gate.hw, &clk_gate_ops, 0); + &rpc->gate.hw, &clk_gate_ops, + CLK_SET_RATE_PARENT); if (IS_ERR(clk)) { kfree(rpc); return clk; @@ -506,7 +507,8 @@ static struct clk * __init cpg_rpcd2_clk_register(const char *name, clk = clk_register_composite(NULL, name, &parent_name, 1, NULL, NULL, &rpcd2->fixed.hw, &clk_fixed_factor_ops, - &rpcd2->gate.hw, &clk_gate_ops, 0); + &rpcd2->gate.hw, &clk_gate_ops, + CLK_SET_RATE_PARENT); if (IS_ERR(clk)) kfree(rpcd2); diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 49bd7a4c015c..5f66bf879772 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -921,11 +921,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = { .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets), }; +static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + +static struct ccu_mux_nb sun50i_a64_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static int sun50i_a64_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; u32 val; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -939,7 +954,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev) writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG); - return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + if (ret) + return ret; + + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_a64_cpu_nb); + + return 0; } static const struct of_device_id sun50i_a64_ccu_ids[] = { diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 0d07c0ba49b6..2b2a3b81c16b 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -777,7 +777,11 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("ahbdma", "hclk", 33, 0, tegra_clk_ahbdma, 0), GATE("apbdma", "pclk", 34, 0, tegra_clk_apbdma, 0), GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), - GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), + /* + * Critical for RAM re-repair operation, which must occur on resume + * from LP1 system suspend and as part of CCPLEX cluster switching. + */ + GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index 5f46782cebeb..b656ba2abcf7 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -405,7 +405,7 @@ static const struct omap_clkctrl_bit_data dra7_gmac_bit_data[] __initconst = { }; static const struct omap_clkctrl_reg_data dra7_gmac_clkctrl_regs[] __initconst = { - { DRA7_GMAC_GMAC_CLKCTRL, dra7_gmac_bit_data, CLKF_SW_SUP, "dpll_gmac_ck" }, + { DRA7_GMAC_GMAC_CLKCTRL, dra7_gmac_bit_data, CLKF_SW_SUP, "gmac_main_clk" }, { 0 }, }; diff --git a/drivers/clk/uniphier/clk-uniphier-peri.c b/drivers/clk/uniphier/clk-uniphier-peri.c index 9caa52944b1c..3e32db9dad81 100644 --- a/drivers/clk/uniphier/clk-uniphier-peri.c +++ b/drivers/clk/uniphier/clk-uniphier-peri.c @@ -18,8 +18,8 @@ #define UNIPHIER_PERI_CLK_FI2C(idx, ch) \ UNIPHIER_CLK_GATE("i2c" #ch, (idx), "i2c", 0x24, 24 + (ch)) -#define UNIPHIER_PERI_CLK_SCSSI(idx) \ - UNIPHIER_CLK_GATE("scssi", (idx), "spi", 0x20, 17) +#define UNIPHIER_PERI_CLK_SCSSI(idx, ch) \ + UNIPHIER_CLK_GATE("scssi" #ch, (idx), "spi", 0x20, 17 + (ch)) #define UNIPHIER_PERI_CLK_MCSSI(idx) \ UNIPHIER_CLK_GATE("mcssi", (idx), "spi", 0x24, 14) @@ -35,7 +35,7 @@ const struct uniphier_clk_data uniphier_ld4_peri_clk_data[] = { UNIPHIER_PERI_CLK_I2C(6, 2), UNIPHIER_PERI_CLK_I2C(7, 3), UNIPHIER_PERI_CLK_I2C(8, 4), - UNIPHIER_PERI_CLK_SCSSI(11), + UNIPHIER_PERI_CLK_SCSSI(11, 0), { /* sentinel */ } }; @@ -51,7 +51,10 @@ const struct uniphier_clk_data uniphier_pro4_peri_clk_data[] = { UNIPHIER_PERI_CLK_FI2C(8, 4), UNIPHIER_PERI_CLK_FI2C(9, 5), UNIPHIER_PERI_CLK_FI2C(10, 6), - UNIPHIER_PERI_CLK_SCSSI(11), - UNIPHIER_PERI_CLK_MCSSI(12), + UNIPHIER_PERI_CLK_SCSSI(11, 0), + UNIPHIER_PERI_CLK_SCSSI(12, 1), + UNIPHIER_PERI_CLK_SCSSI(13, 2), + UNIPHIER_PERI_CLK_SCSSI(14, 3), + UNIPHIER_PERI_CLK_MCSSI(15), { /* sentinel */ } }; diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 2b196cbfadb6..b235f446ee50 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -121,7 +121,7 @@ static int __init bcm2835_timer_init(struct device_node *node) ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); - goto err_iounmap; + goto err_timer_free; } clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff); @@ -130,6 +130,9 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; +err_timer_free: + kfree(timer); + err_iounmap: iounmap(base); return ret; diff --git a/drivers/clocksource/timer-davinci.c b/drivers/clocksource/timer-davinci.c index 62745c962049..e421946a91c5 100644 --- a/drivers/clocksource/timer-davinci.c +++ b/drivers/clocksource/timer-davinci.c @@ -302,10 +302,6 @@ int __init davinci_timer_register(struct clk *clk, return rv; } - clockevents_config_and_register(&clockevent->dev, tick_rate, - DAVINCI_TIMER_MIN_DELTA, - DAVINCI_TIMER_MAX_DELTA); - davinci_clocksource.dev.rating = 300; davinci_clocksource.dev.read = davinci_clocksource_read; davinci_clocksource.dev.mask = @@ -323,6 +319,10 @@ int __init davinci_timer_register(struct clk *clk, davinci_clocksource_init_tim34(base); } + clockevents_config_and_register(&clockevent->dev, tick_rate, + DAVINCI_TIMER_MIN_DELTA, + DAVINCI_TIMER_MAX_DELTA); + rv = clocksource_register_hz(&davinci_clocksource.dev, tick_rate); if (rv) { pr_err("Unable to register clocksource"); diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8d8da763adc5..8910fd1ae3c6 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -217,7 +217,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, return ret; } -static int cppc_verify_policy(struct cpufreq_policy *policy) +static int cppc_verify_policy(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); return 0; diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index cd53272e2fa2..f7a7bcf6f52e 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -291,7 +291,7 @@ static int nforce2_target(struct cpufreq_policy *policy, * nforce2_verify - verifies a new CPUFreq policy * @policy: new policy */ -static int nforce2_verify(struct cpufreq_policy *policy) +static int nforce2_verify(struct cpufreq_policy_data *policy) { unsigned int fsb_pol_max; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 77114a3897fb..b60d349e3b1e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -74,6 +74,9 @@ static void cpufreq_exit_governor(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); static void cpufreq_stop_governor(struct cpufreq_policy *policy); static void cpufreq_governor_limits(struct cpufreq_policy *policy); +static int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_governor *new_gov, + unsigned int new_pol); /** * Two notifier lists: the "policy" list is involved in the @@ -616,25 +619,22 @@ static struct cpufreq_governor *find_governor(const char *str_governor) return NULL; } -static int cpufreq_parse_policy(char *str_governor, - struct cpufreq_policy *policy) +static unsigned int cpufreq_parse_policy(char *str_governor) { - if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_PERFORMANCE; - return 0; - } - if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) { - policy->policy = CPUFREQ_POLICY_POWERSAVE; - return 0; - } - return -EINVAL; + if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) + return CPUFREQ_POLICY_PERFORMANCE; + + if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) + return CPUFREQ_POLICY_POWERSAVE; + + return CPUFREQ_POLICY_UNKNOWN; } /** * cpufreq_parse_governor - parse a governor string only for has_target() + * @str_governor: Governor name. */ -static int cpufreq_parse_governor(char *str_governor, - struct cpufreq_policy *policy) +static struct cpufreq_governor *cpufreq_parse_governor(char *str_governor) { struct cpufreq_governor *t; @@ -648,7 +648,7 @@ static int cpufreq_parse_governor(char *str_governor, ret = request_module("cpufreq_%s", str_governor); if (ret) - return -EINVAL; + return NULL; mutex_lock(&cpufreq_governor_mutex); @@ -659,12 +659,7 @@ static int cpufreq_parse_governor(char *str_governor, mutex_unlock(&cpufreq_governor_mutex); - if (t) { - policy->governor = t; - return 0; - } - - return -EINVAL; + return t; } /** @@ -765,28 +760,33 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) static ssize_t store_scaling_governor(struct cpufreq_policy *policy, const char *buf, size_t count) { + char str_governor[16]; int ret; - char str_governor[16]; - struct cpufreq_policy new_policy; - - memcpy(&new_policy, policy, sizeof(*policy)); ret = sscanf(buf, "%15s", str_governor); if (ret != 1) return -EINVAL; if (cpufreq_driver->setpolicy) { - if (cpufreq_parse_policy(str_governor, &new_policy)) + unsigned int new_pol; + + new_pol = cpufreq_parse_policy(str_governor); + if (!new_pol) return -EINVAL; + + ret = cpufreq_set_policy(policy, NULL, new_pol); } else { - if (cpufreq_parse_governor(str_governor, &new_policy)) + struct cpufreq_governor *new_gov; + + new_gov = cpufreq_parse_governor(str_governor); + if (!new_gov) return -EINVAL; - } - ret = cpufreq_set_policy(policy, &new_policy); + ret = cpufreq_set_policy(policy, new_gov, + CPUFREQ_POLICY_UNKNOWN); - if (new_policy.governor) - module_put(new_policy.governor->owner); + module_put(new_gov->owner); + } return ret ? ret : count; } @@ -1053,40 +1053,41 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void) static int cpufreq_init_policy(struct cpufreq_policy *policy) { - struct cpufreq_governor *gov = NULL, *def_gov = NULL; - struct cpufreq_policy new_policy; - - memcpy(&new_policy, policy, sizeof(*policy)); - - def_gov = cpufreq_default_governor(); + struct cpufreq_governor *def_gov = cpufreq_default_governor(); + struct cpufreq_governor *gov = NULL; + unsigned int pol = CPUFREQ_POLICY_UNKNOWN; if (has_target()) { - /* - * Update governor of new_policy to the governor used before - * hotplug - */ + /* Update policy governor to the one used before hotplug. */ gov = find_governor(policy->last_governor); if (gov) { pr_debug("Restoring governor %s for cpu %d\n", - policy->governor->name, policy->cpu); - } else { - if (!def_gov) - return -ENODATA; + policy->governor->name, policy->cpu); + } else if (def_gov) { gov = def_gov; + } else { + return -ENODATA; } - new_policy.governor = gov; } else { /* Use the default policy if there is no last_policy. */ if (policy->last_policy) { - new_policy.policy = policy->last_policy; - } else { - if (!def_gov) - return -ENODATA; - cpufreq_parse_policy(def_gov->name, &new_policy); + pol = policy->last_policy; + } else if (def_gov) { + pol = cpufreq_parse_policy(def_gov->name); + /* + * In case the default governor is neiter "performance" + * nor "powersave", fall back to the initial policy + * value set by the driver. + */ + if (pol == CPUFREQ_POLICY_UNKNOWN) + pol = policy->policy; } + if (pol != CPUFREQ_POLICY_PERFORMANCE && + pol != CPUFREQ_POLICY_POWERSAVE) + return -ENODATA; } - return cpufreq_set_policy(policy, &new_policy); + return cpufreq_set_policy(policy, gov, pol); } static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) @@ -1114,13 +1115,10 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cp void refresh_frequency_limits(struct cpufreq_policy *policy) { - struct cpufreq_policy new_policy; - if (!policy_is_inactive(policy)) { - new_policy = *policy; pr_debug("updating policy for CPU %u\n", policy->cpu); - cpufreq_set_policy(policy, &new_policy); + cpufreq_set_policy(policy, policy->governor, policy->policy); } } EXPORT_SYMBOL(refresh_frequency_limits); @@ -2364,46 +2362,49 @@ EXPORT_SYMBOL(cpufreq_get_policy); /** * cpufreq_set_policy - Modify cpufreq policy parameters. * @policy: Policy object to modify. - * @new_policy: New policy data. + * @new_gov: Policy governor pointer. + * @new_pol: Policy value (for drivers with built-in governors). * - * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the - * min and max parameters of @new_policy to @policy and either invoke the - * driver's ->setpolicy() callback (if present) or carry out a governor update - * for @policy. That is, run the current governor's ->limits() callback (if the - * governor field in @new_policy points to the same object as the one in - * @policy) or replace the governor for @policy with the new one stored in - * @new_policy. + * Invoke the cpufreq driver's ->verify() callback to sanity-check the frequency + * limits to be set for the policy, update @policy with the verified limits + * values and either invoke the driver's ->setpolicy() callback (if present) or + * carry out a governor update for @policy. That is, run the current governor's + * ->limits() callback (if @new_gov points to the same object as the one in + * @policy) or replace the governor for @policy with @new_gov. * * The cpuinfo part of @policy is not updated by this function. */ -int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy) +static int cpufreq_set_policy(struct cpufreq_policy *policy, + struct cpufreq_governor *new_gov, + unsigned int new_pol) { + struct cpufreq_policy_data new_data; struct cpufreq_governor *old_gov; int ret; - pr_debug("setting new policy for CPU %u: %u - %u kHz\n", - new_policy->cpu, new_policy->min, new_policy->max); - - memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - + memcpy(&new_data.cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); + new_data.freq_table = policy->freq_table; + new_data.cpu = policy->cpu; /* * PM QoS framework collects all the requests from users and provide us * the final aggregated value here. */ - new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); - new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); + new_data.min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN); + new_data.max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX); + + pr_debug("setting new policy for CPU %u: %u - %u kHz\n", + new_data.cpu, new_data.min, new_data.max); /* * Verify that the CPU speed can be set within these limits and make sure * that min <= max. */ - ret = cpufreq_driver->verify(new_policy); + ret = cpufreq_driver->verify(&new_data); if (ret) return ret; - policy->min = new_policy->min; - policy->max = new_policy->max; + policy->min = new_data.min; + policy->max = new_data.max; trace_cpu_frequency_limits(policy); policy->cached_target_freq = UINT_MAX; @@ -2412,12 +2413,12 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min, policy->max); if (cpufreq_driver->setpolicy) { - policy->policy = new_policy->policy; + policy->policy = new_pol; pr_debug("setting range\n"); return cpufreq_driver->setpolicy(policy); } - if (new_policy->governor == policy->governor) { + if (new_gov == policy->governor) { pr_debug("governor limits update\n"); cpufreq_governor_limits(policy); return 0; @@ -2434,7 +2435,7 @@ int cpufreq_set_policy(struct cpufreq_policy *policy, } /* start new governor */ - policy->governor = new_policy->governor; + policy->governor = new_gov; ret = cpufreq_init_governor(policy); if (!ret) { ret = cpufreq_start_governor(policy); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 737ff3b9c2c0..b5bc5a1b6de7 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -28,8 +28,8 @@ struct cs_dbs_tuners { }; /* Conservative governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (80) -#define DEF_FREQUENCY_DOWN_THRESHOLD (20) +#define DEF_FREQUENCY_UP_THRESHOLD (63) +#define DEF_FREQUENCY_DOWN_THRESHOLD (26) #define DEF_FREQUENCY_STEP (5) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 82a4d37ddecb..1130e0f5db72 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -18,7 +18,7 @@ #include "cpufreq_ondemand.h" /* On-demand governor macros */ -#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_UP_THRESHOLD (63) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) @@ -127,7 +127,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) } /* - * Every sampling_rate, we check, if current idle time is less than 20% + * Every sampling_rate, we check, if current idle time is less than 37% * (default), then we try to increase frequency. Else, we adjust the frequency * proportional to load. */ diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index ded427e0a488..e117b0059123 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -60,7 +60,7 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, return 0; } -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, struct cpufreq_frequency_table *table) { struct cpufreq_frequency_table *pos; @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); * Generic routine to verify policy & frequency table, requires driver to set * policy->freq_table prior to it. */ -int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) +int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy) { if (!policy->freq_table) return -ENODEV; diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index e97b5733aa24..75b3ef7ec679 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -328,7 +328,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, unsigned int khz) * for the hardware supported by the driver. */ -static int cpufreq_gx_verify(struct cpufreq_policy *policy) +static int cpufreq_gx_verify(struct cpufreq_policy_data *policy) { unsigned int tmp_freq = 0; u8 tmp1, tmp2; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index d2fa3e9ccd97..e23e51230b89 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2036,8 +2036,9 @@ static int intel_pstate_get_max_freq(struct cpudata *cpu) cpu->pstate.max_freq : cpu->pstate.turbo_freq; } -static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, - struct cpudata *cpu) +static void intel_pstate_update_perf_limits(struct cpudata *cpu, + unsigned int policy_min, + unsigned int policy_max) { int max_freq = intel_pstate_get_max_freq(cpu); int32_t max_policy_perf, min_policy_perf; @@ -2056,18 +2057,17 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, turbo_max = cpu->pstate.turbo_pstate; } - max_policy_perf = max_state * policy->max / max_freq; - if (policy->max == policy->min) { + max_policy_perf = max_state * policy_max / max_freq; + if (policy_max == policy_min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = max_state * policy->min / max_freq; + min_policy_perf = max_state * policy_min / max_freq; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", - policy->cpu, max_state, - min_policy_perf, max_policy_perf); + cpu->cpu, max_state, min_policy_perf, max_policy_perf); /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { @@ -2081,7 +2081,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, global_min = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); global_min = clamp_t(int32_t, global_min, 0, global_max); - pr_debug("cpu:%d global_min:%d global_max:%d\n", policy->cpu, + pr_debug("cpu:%d global_min:%d global_max:%d\n", cpu->cpu, global_min, global_max); cpu->min_perf_ratio = max(min_policy_perf, global_min); @@ -2094,7 +2094,7 @@ static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, cpu->max_perf_ratio); } - pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", policy->cpu, + pr_debug("cpu:%d max_perf_ratio:%d min_perf_ratio:%d\n", cpu->cpu, cpu->max_perf_ratio, cpu->min_perf_ratio); } @@ -2114,7 +2114,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); - intel_pstate_update_perf_limits(policy, cpu); + intel_pstate_update_perf_limits(cpu, policy->min, policy->max); if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* @@ -2143,8 +2143,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) return 0; } -static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, - struct cpudata *cpu) +static void intel_pstate_adjust_policy_max(struct cpudata *cpu, + struct cpufreq_policy_data *policy) { if (!hwp_active && cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && @@ -2155,7 +2155,7 @@ static void intel_pstate_adjust_policy_max(struct cpufreq_policy *policy, } } -static int intel_pstate_verify_policy(struct cpufreq_policy *policy) +static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; @@ -2163,11 +2163,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, intel_pstate_get_max_freq(cpu)); - if (policy->policy != CPUFREQ_POLICY_POWERSAVE && - policy->policy != CPUFREQ_POLICY_PERFORMANCE) - return -EINVAL; - - intel_pstate_adjust_policy_max(policy, cpu); + intel_pstate_adjust_policy_max(cpu, policy); return 0; } @@ -2268,7 +2264,7 @@ static struct cpufreq_driver intel_pstate = { .name = "intel_pstate", }; -static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) +static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; @@ -2276,9 +2272,9 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, intel_pstate_get_max_freq(cpu)); - intel_pstate_adjust_policy_max(policy, cpu); + intel_pstate_adjust_policy_max(cpu, policy); - intel_pstate_update_perf_limits(policy, cpu); + intel_pstate_update_perf_limits(cpu, policy->min, policy->max); return 0; } diff --git a/drivers/cpufreq/longrun.c b/drivers/cpufreq/longrun.c index 64b8689f7a4a..0b08be8bff76 100644 --- a/drivers/cpufreq/longrun.c +++ b/drivers/cpufreq/longrun.c @@ -122,7 +122,7 @@ static int longrun_set_policy(struct cpufreq_policy *policy) * Validates a new CPUFreq policy. This function has to be called with * cpufreq_driver locked. */ -static int longrun_verify_policy(struct cpufreq_policy *policy) +static int longrun_verify_policy(struct cpufreq_policy_data *policy) { if (!policy) return -EINVAL; @@ -130,10 +130,6 @@ static int longrun_verify_policy(struct cpufreq_policy *policy) policy->cpu = 0; cpufreq_verify_within_cpu_limits(policy); - if ((policy->policy != CPUFREQ_POLICY_POWERSAVE) && - (policy->policy != CPUFREQ_POLICY_PERFORMANCE)) - return -EINVAL; - return 0; } diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index fdc767fdbe6a..f90273006553 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -109,7 +109,7 @@ struct pcc_cpu { static struct pcc_cpu __percpu *pcc_cpu_info; -static int pcc_cpufreq_verify(struct cpufreq_policy *policy) +static int pcc_cpufreq_verify(struct cpufreq_policy_data *policy) { cpufreq_verify_within_cpu_limits(policy); return 0; diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 5096c0ab781b..0ac265d47ef0 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -87,7 +87,7 @@ static int sh_cpufreq_target(struct cpufreq_policy *policy, return work_on_cpu(policy->cpu, __sh_cpufreq_target, &data); } -static int sh_cpufreq_verify(struct cpufreq_policy *policy) +static int sh_cpufreq_verify(struct cpufreq_policy_data *policy) { struct clk *cpuclk = &per_cpu(sh_cpuclk, policy->cpu); struct cpufreq_frequency_table *freq_table; diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c index 707dbc1b7ac8..98d392196df2 100644 --- a/drivers/cpufreq/unicore2-cpufreq.c +++ b/drivers/cpufreq/unicore2-cpufreq.c @@ -22,7 +22,7 @@ static struct cpufreq_driver ucv2_driver; /* make sure that only the "userspace" governor is run * -- anything else wouldn't make sense on this platform, anyway. */ -static int ucv2_verify_speed(struct cpufreq_policy *policy) +static int ucv2_verify_speed(struct cpufreq_policy_data *policy) { if (policy->cpu) return -EINVAL; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 91eb768d4221..0a73bebd04e5 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -716,7 +716,7 @@ source "drivers/crypto/stm32/Kconfig" config CRYPTO_DEV_SAFEXCEL tristate "Inside Secure's SafeXcel cryptographic engine driver" - depends on OF || PCI || COMPILE_TEST + depends on (OF || PCI || COMPILE_TEST) && HAS_IOMEM select CRYPTO_LIB_AES select CRYPTO_AUTHENC select CRYPTO_SKCIPHER diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index 73a7649f915d..9e8b8dede920 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -624,7 +624,7 @@ static int sun8i_ce_probe(struct platform_device *pdev) error_irq: sun8i_ce_pm_exit(ce); error_pm: - sun8i_ce_free_chanlist(ce, MAXFLOW); + sun8i_ce_free_chanlist(ce, MAXFLOW - 1); return err; } @@ -638,7 +638,7 @@ static int sun8i_ce_remove(struct platform_device *pdev) debugfs_remove_recursive(ce->dbgfs_dir); #endif - sun8i_ce_free_chanlist(ce, MAXFLOW); + sun8i_ce_free_chanlist(ce, MAXFLOW - 1); sun8i_ce_pm_exit(ce); return 0; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 90997cc509b8..6b301afffd11 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -595,7 +595,7 @@ static int sun8i_ss_probe(struct platform_device *pdev) error_irq: sun8i_ss_pm_exit(ss); error_pm: - sun8i_ss_free_flows(ss, MAXFLOW); + sun8i_ss_free_flows(ss, MAXFLOW - 1); return err; } @@ -609,7 +609,7 @@ static int sun8i_ss_remove(struct platform_device *pdev) debugfs_remove_recursive(ss->dbgfs_dir); #endif - sun8i_ss_free_flows(ss, MAXFLOW); + sun8i_ss_free_flows(ss, MAXFLOW - 1); sun8i_ss_pm_exit(ss); diff --git a/drivers/crypto/amlogic/Kconfig b/drivers/crypto/amlogic/Kconfig index b90850d18965..cf9547602670 100644 --- a/drivers/crypto/amlogic/Kconfig +++ b/drivers/crypto/amlogic/Kconfig @@ -1,5 +1,6 @@ config CRYPTO_DEV_AMLOGIC_GXL tristate "Support for amlogic cryptographic offloader" + depends on HAS_IOMEM default y if ARCH_MESON select CRYPTO_SKCIPHER select CRYPTO_ENGINE diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c index fa05fce1c0de..9d4ead2f7ebb 100644 --- a/drivers/crypto/amlogic/amlogic-gxl-core.c +++ b/drivers/crypto/amlogic/amlogic-gxl-core.c @@ -289,7 +289,7 @@ static int meson_crypto_probe(struct platform_device *pdev) error_alg: meson_unregister_algs(mc); error_flow: - meson_free_chanlist(mc, MAXFLOW); + meson_free_chanlist(mc, MAXFLOW - 1); clk_disable_unprepare(mc->busclk); return err; } @@ -304,7 +304,7 @@ static int meson_crypto_remove(struct platform_device *pdev) meson_unregister_algs(mc); - meson_free_chanlist(mc, MAXFLOW); + meson_free_chanlist(mc, MAXFLOW - 1); clk_disable_unprepare(mc->busclk); return 0; diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 91092504bc96..b4dee726b253 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -89,7 +89,6 @@ struct atmel_aes_caps { bool has_dualbuff; bool has_cfb64; - bool has_ctr32; bool has_gcm; bool has_xts; bool has_authenc; @@ -122,6 +121,7 @@ struct atmel_aes_ctr_ctx { size_t offset; struct scatterlist src[2]; struct scatterlist dst[2]; + u32 blocks; }; struct atmel_aes_gcm_ctx { @@ -514,8 +514,37 @@ static void atmel_aes_set_iv_as_last_ciphertext_block(struct atmel_aes_dev *dd) } } +static inline struct atmel_aes_ctr_ctx * +atmel_aes_ctr_ctx_cast(struct atmel_aes_base_ctx *ctx) +{ + return container_of(ctx, struct atmel_aes_ctr_ctx, base); +} + +static void atmel_aes_ctr_update_req_iv(struct atmel_aes_dev *dd) +{ + struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); + struct skcipher_request *req = skcipher_request_cast(dd->areq); + struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); + unsigned int ivsize = crypto_skcipher_ivsize(skcipher); + int i; + + /* + * The CTR transfer works in fragments of data of maximum 1 MByte + * because of the 16 bit CTR counter embedded in the IP. When reaching + * here, ctx->blocks contains the number of blocks of the last fragment + * processed, there is no need to explicit cast it to u16. + */ + for (i = 0; i < ctx->blocks; i++) + crypto_inc((u8 *)ctx->iv, AES_BLOCK_SIZE); + + memcpy(req->iv, ctx->iv, ivsize); +} + static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) { + struct skcipher_request *req = skcipher_request_cast(dd->areq); + struct atmel_aes_reqctx *rctx = skcipher_request_ctx(req); + #if IS_ENABLED(CONFIG_CRYPTO_DEV_ATMEL_AUTHENC) if (dd->ctx->is_aead) atmel_aes_authenc_complete(dd, err); @@ -524,8 +553,13 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) clk_disable(dd->iclk); dd->flags &= ~AES_FLAGS_BUSY; - if (!dd->ctx->is_aead) - atmel_aes_set_iv_as_last_ciphertext_block(dd); + if (!dd->ctx->is_aead && + (rctx->mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB) { + if ((rctx->mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_CTR) + atmel_aes_set_iv_as_last_ciphertext_block(dd); + else + atmel_aes_ctr_update_req_iv(dd); + } if (dd->is_async) dd->areq->complete(dd->areq, err); @@ -1004,19 +1038,14 @@ static int atmel_aes_start(struct atmel_aes_dev *dd) atmel_aes_transfer_complete); } -static inline struct atmel_aes_ctr_ctx * -atmel_aes_ctr_ctx_cast(struct atmel_aes_base_ctx *ctx) -{ - return container_of(ctx, struct atmel_aes_ctr_ctx, base); -} - static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) { struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); struct skcipher_request *req = skcipher_request_cast(dd->areq); struct scatterlist *src, *dst; - u32 ctr, blocks; size_t datalen; + u32 ctr; + u16 start, end; bool use_dma, fragmented = false; /* Check for transfer completion. */ @@ -1026,29 +1055,19 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) /* Compute data length. */ datalen = req->cryptlen - ctx->offset; - blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); + ctx->blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); ctr = be32_to_cpu(ctx->iv[3]); - if (dd->caps.has_ctr32) { - /* Check 32bit counter overflow. */ - u32 start = ctr; - u32 end = start + blocks - 1; - - if (end < start) { - ctr |= 0xffffffff; - datalen = AES_BLOCK_SIZE * -start; - fragmented = true; - } - } else { - /* Check 16bit counter overflow. */ - u16 start = ctr & 0xffff; - u16 end = start + (u16)blocks - 1; - - if (blocks >> 16 || end < start) { - ctr |= 0xffff; - datalen = AES_BLOCK_SIZE * (0x10000-start); - fragmented = true; - } + + /* Check 16bit counter overflow. */ + start = ctr & 0xffff; + end = start + ctx->blocks - 1; + + if (ctx->blocks >> 16 || end < start) { + ctr |= 0xffff; + datalen = AES_BLOCK_SIZE * (0x10000 - start); + fragmented = true; } + use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD); /* Jump to offset. */ @@ -1131,7 +1150,8 @@ static int atmel_aes_crypt(struct skcipher_request *req, unsigned long mode) rctx = skcipher_request_ctx(req); rctx->mode = mode; - if (!(mode & AES_FLAGS_ENCRYPT) && (req->src == req->dst)) { + if ((mode & AES_FLAGS_OPMODE_MASK) != AES_FLAGS_ECB && + !(mode & AES_FLAGS_ENCRYPT) && req->src == req->dst) { unsigned int ivsize = crypto_skcipher_ivsize(skcipher); if (req->cryptlen >= ivsize) @@ -2533,7 +2553,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) { dd->caps.has_dualbuff = 0; dd->caps.has_cfb64 = 0; - dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; dd->caps.has_authenc = 0; @@ -2544,7 +2563,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; dd->caps.has_authenc = 1; @@ -2553,7 +2571,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x200: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.max_burst_size = 4; break; diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 8ea0e4bcde0d..7394671f815b 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1918,12 +1918,7 @@ static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, { struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); - if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) { - crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return 0; + return atmel_sha_hmac_key_set(&hmac->hkey, key, keylen); } static int atmel_sha_hmac_init(struct ahash_request *req) diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c index 0c1f79b30fc1..fde34846b017 100644 --- a/drivers/crypto/atmel-tdes.c +++ b/drivers/crypto/atmel-tdes.c @@ -41,20 +41,23 @@ #include "atmel-tdes-regs.h" /* TDES flags */ -#define TDES_FLAGS_MODE_MASK 0x00ff -#define TDES_FLAGS_ENCRYPT BIT(0) -#define TDES_FLAGS_CBC BIT(1) -#define TDES_FLAGS_CFB BIT(2) -#define TDES_FLAGS_CFB8 BIT(3) -#define TDES_FLAGS_CFB16 BIT(4) -#define TDES_FLAGS_CFB32 BIT(5) -#define TDES_FLAGS_CFB64 BIT(6) -#define TDES_FLAGS_OFB BIT(7) - -#define TDES_FLAGS_INIT BIT(16) -#define TDES_FLAGS_FAST BIT(17) -#define TDES_FLAGS_BUSY BIT(18) -#define TDES_FLAGS_DMA BIT(19) +/* Reserve bits [17:16], [13:12], [2:0] for AES Mode Register */ +#define TDES_FLAGS_ENCRYPT TDES_MR_CYPHER_ENC +#define TDES_FLAGS_OPMODE_MASK (TDES_MR_OPMOD_MASK | TDES_MR_CFBS_MASK) +#define TDES_FLAGS_ECB TDES_MR_OPMOD_ECB +#define TDES_FLAGS_CBC TDES_MR_OPMOD_CBC +#define TDES_FLAGS_OFB TDES_MR_OPMOD_OFB +#define TDES_FLAGS_CFB64 (TDES_MR_OPMOD_CFB | TDES_MR_CFBS_64b) +#define TDES_FLAGS_CFB32 (TDES_MR_OPMOD_CFB | TDES_MR_CFBS_32b) +#define TDES_FLAGS_CFB16 (TDES_MR_OPMOD_CFB | TDES_MR_CFBS_16b) +#define TDES_FLAGS_CFB8 (TDES_MR_OPMOD_CFB | TDES_MR_CFBS_8b) + +#define TDES_FLAGS_MODE_MASK (TDES_FLAGS_OPMODE_MASK | TDES_FLAGS_ENCRYPT) + +#define TDES_FLAGS_INIT BIT(3) +#define TDES_FLAGS_FAST BIT(4) +#define TDES_FLAGS_BUSY BIT(5) +#define TDES_FLAGS_DMA BIT(6) #define ATMEL_TDES_QUEUE_LENGTH 50 @@ -282,25 +285,7 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) valmr |= TDES_MR_TDESMOD_DES; } - if (dd->flags & TDES_FLAGS_CBC) { - valmr |= TDES_MR_OPMOD_CBC; - } else if (dd->flags & TDES_FLAGS_CFB) { - valmr |= TDES_MR_OPMOD_CFB; - - if (dd->flags & TDES_FLAGS_CFB8) - valmr |= TDES_MR_CFBS_8b; - else if (dd->flags & TDES_FLAGS_CFB16) - valmr |= TDES_MR_CFBS_16b; - else if (dd->flags & TDES_FLAGS_CFB32) - valmr |= TDES_MR_CFBS_32b; - else if (dd->flags & TDES_FLAGS_CFB64) - valmr |= TDES_MR_CFBS_64b; - } else if (dd->flags & TDES_FLAGS_OFB) { - valmr |= TDES_MR_OPMOD_OFB; - } - - if ((dd->flags & TDES_FLAGS_ENCRYPT) || (dd->flags & TDES_FLAGS_OFB)) - valmr |= TDES_MR_CYPHER_ENC; + valmr |= dd->flags & TDES_FLAGS_MODE_MASK; atmel_tdes_write(dd, TDES_CR, valcr); atmel_tdes_write(dd, TDES_MR, valmr); @@ -308,10 +293,8 @@ static int atmel_tdes_write_ctrl(struct atmel_tdes_dev *dd) atmel_tdes_write_n(dd, TDES_KEY1W1R, dd->ctx->key, dd->ctx->keylen >> 2); - if (((dd->flags & TDES_FLAGS_CBC) || (dd->flags & TDES_FLAGS_CFB) || - (dd->flags & TDES_FLAGS_OFB)) && dd->req->iv) { + if (dd->req->iv && (valmr & TDES_MR_OPMOD_MASK) != TDES_MR_OPMOD_ECB) atmel_tdes_write_n(dd, TDES_IV1R, (void *)dd->req->iv, 2); - } return 0; } @@ -402,6 +385,7 @@ static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, { struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); struct atmel_tdes_dev *dd = ctx->dd; + struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(dd->req); int len32; dd->dma_size = length; @@ -411,12 +395,19 @@ static int atmel_tdes_crypt_pdc(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, DMA_TO_DEVICE); } - if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB8)) + switch (rctx->mode & TDES_FLAGS_OPMODE_MASK) { + case TDES_FLAGS_CFB8: len32 = DIV_ROUND_UP(length, sizeof(u8)); - else if ((dd->flags & TDES_FLAGS_CFB) && (dd->flags & TDES_FLAGS_CFB16)) + break; + + case TDES_FLAGS_CFB16: len32 = DIV_ROUND_UP(length, sizeof(u16)); - else + break; + + default: len32 = DIV_ROUND_UP(length, sizeof(u32)); + break; + } atmel_tdes_write(dd, TDES_PTCR, TDES_PTCR_TXTDIS|TDES_PTCR_RXTDIS); atmel_tdes_write(dd, TDES_TPR, dma_addr_in); @@ -438,8 +429,10 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, { struct atmel_tdes_ctx *ctx = crypto_tfm_ctx(tfm); struct atmel_tdes_dev *dd = ctx->dd; + struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(dd->req); struct scatterlist sg[2]; struct dma_async_tx_descriptor *in_desc, *out_desc; + enum dma_slave_buswidth addr_width; dd->dma_size = length; @@ -448,23 +441,23 @@ static int atmel_tdes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, DMA_TO_DEVICE); } - if (dd->flags & TDES_FLAGS_CFB8) { - dd->dma_lch_in.dma_conf.dst_addr_width = - DMA_SLAVE_BUSWIDTH_1_BYTE; - dd->dma_lch_out.dma_conf.src_addr_width = - DMA_SLAVE_BUSWIDTH_1_BYTE; - } else if (dd->flags & TDES_FLAGS_CFB16) { - dd->dma_lch_in.dma_conf.dst_addr_width = - DMA_SLAVE_BUSWIDTH_2_BYTES; - dd->dma_lch_out.dma_conf.src_addr_width = - DMA_SLAVE_BUSWIDTH_2_BYTES; - } else { - dd->dma_lch_in.dma_conf.dst_addr_width = - DMA_SLAVE_BUSWIDTH_4_BYTES; - dd->dma_lch_out.dma_conf.src_addr_width = - DMA_SLAVE_BUSWIDTH_4_BYTES; + switch (rctx->mode & TDES_FLAGS_OPMODE_MASK) { + case TDES_FLAGS_CFB8: + addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE; + break; + + case TDES_FLAGS_CFB16: + addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; + break; + + default: + addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + break; } + dd->dma_lch_in.dma_conf.dst_addr_width = addr_width; + dd->dma_lch_out.dma_conf.src_addr_width = addr_width; + dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf); dmaengine_slave_config(dd->dma_lch_out.chan, &dd->dma_lch_out.dma_conf); @@ -600,12 +593,14 @@ atmel_tdes_set_iv_as_last_ciphertext_block(struct atmel_tdes_dev *dd) static void atmel_tdes_finish_req(struct atmel_tdes_dev *dd, int err) { struct skcipher_request *req = dd->req; + struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req); clk_disable_unprepare(dd->iclk); dd->flags &= ~TDES_FLAGS_BUSY; - atmel_tdes_set_iv_as_last_ciphertext_block(dd); + if ((rctx->mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB) + atmel_tdes_set_iv_as_last_ciphertext_block(dd); req->base.complete(&req->base, err); } @@ -699,35 +694,44 @@ static int atmel_tdes_crypt(struct skcipher_request *req, unsigned long mode) struct atmel_tdes_ctx *ctx = crypto_skcipher_ctx(skcipher); struct atmel_tdes_reqctx *rctx = skcipher_request_ctx(req); - if (mode & TDES_FLAGS_CFB8) { + switch (mode & TDES_FLAGS_OPMODE_MASK) { + case TDES_FLAGS_CFB8: if (!IS_ALIGNED(req->cryptlen, CFB8_BLOCK_SIZE)) { pr_err("request size is not exact amount of CFB8 blocks\n"); return -EINVAL; } ctx->block_size = CFB8_BLOCK_SIZE; - } else if (mode & TDES_FLAGS_CFB16) { + break; + + case TDES_FLAGS_CFB16: if (!IS_ALIGNED(req->cryptlen, CFB16_BLOCK_SIZE)) { pr_err("request size is not exact amount of CFB16 blocks\n"); return -EINVAL; } ctx->block_size = CFB16_BLOCK_SIZE; - } else if (mode & TDES_FLAGS_CFB32) { + break; + + case TDES_FLAGS_CFB32: if (!IS_ALIGNED(req->cryptlen, CFB32_BLOCK_SIZE)) { pr_err("request size is not exact amount of CFB32 blocks\n"); return -EINVAL; } ctx->block_size = CFB32_BLOCK_SIZE; - } else { + break; + + default: if (!IS_ALIGNED(req->cryptlen, DES_BLOCK_SIZE)) { pr_err("request size is not exact amount of DES blocks\n"); return -EINVAL; } ctx->block_size = DES_BLOCK_SIZE; + break; } rctx->mode = mode; - if (!(mode & TDES_FLAGS_ENCRYPT) && req->src == req->dst) { + if ((mode & TDES_FLAGS_OPMODE_MASK) != TDES_FLAGS_ECB && + !(mode & TDES_FLAGS_ENCRYPT) && req->src == req->dst) { unsigned int ivsize = crypto_skcipher_ivsize(skcipher); if (req->cryptlen >= ivsize) @@ -841,17 +845,17 @@ static int atmel_tdes_setkey(struct crypto_skcipher *tfm, const u8 *key, static int atmel_tdes_ecb_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT); + return atmel_tdes_crypt(req, TDES_FLAGS_ECB | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_ecb_decrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, 0); + return atmel_tdes_crypt(req, TDES_FLAGS_ECB); } static int atmel_tdes_cbc_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CBC); + return atmel_tdes_crypt(req, TDES_FLAGS_CBC | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_cbc_decrypt(struct skcipher_request *req) @@ -860,50 +864,47 @@ static int atmel_tdes_cbc_decrypt(struct skcipher_request *req) } static int atmel_tdes_cfb_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB64 | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_cfb_decrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_CFB); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB64); } static int atmel_tdes_cfb8_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | - TDES_FLAGS_CFB8); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB8 | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_cfb8_decrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB8); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB8); } static int atmel_tdes_cfb16_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | - TDES_FLAGS_CFB16); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB16 | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_cfb16_decrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB16); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB16); } static int atmel_tdes_cfb32_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_CFB | - TDES_FLAGS_CFB32); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB32 | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_cfb32_decrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_CFB | TDES_FLAGS_CFB32); + return atmel_tdes_crypt(req, TDES_FLAGS_CFB32); } static int atmel_tdes_ofb_encrypt(struct skcipher_request *req) { - return atmel_tdes_crypt(req, TDES_FLAGS_ENCRYPT | TDES_FLAGS_OFB); + return atmel_tdes_crypt(req, TDES_FLAGS_OFB | TDES_FLAGS_ENCRYPT); } static int atmel_tdes_ofb_decrypt(struct skcipher_request *req) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 4b20606983a4..22ebe40f09f5 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -1251,7 +1251,7 @@ static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key, if (len != 16 && len != 24 && len != 32) { crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -1; + return -EINVAL; } ctx->key_length = len; diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 3443f6d6dd83..6863d7097674 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -2481,7 +2481,7 @@ static struct caam_aead_alg driver_aeads[] = { .cra_name = "echainiv(authenc(hmac(sha256)," "cbc(des)))", .cra_driver_name = "echainiv-authenc-" - "hmac-sha256-cbc-desi-" + "hmac-sha256-cbc-des-" "caam-qi2", .cra_blocksize = DES_BLOCK_SIZE, }, diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index d7c3c3805693..3e811fcc6b83 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -671,11 +671,9 @@ static int caam_probe(struct platform_device *pdev) of_node_put(np); if (!ctrlpriv->mc_en) - clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK | MCFGR_LONG_PTR, + clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF | - MCFGR_WDENABLE | MCFGR_LARGE_BURST | - (sizeof(dma_addr_t) == sizeof(u64) ? - MCFGR_LONG_PTR : 0)); + MCFGR_WDENABLE | MCFGR_LARGE_BURST); handle_imx6_err005766(&ctrl->mcr); diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 0186b3df4c87..0d5576f6ad21 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -586,6 +586,7 @@ const struct ccp_vdata ccpv3_platform = { .setup = NULL, .perform = &ccp3_actions, .offset = 0, + .rsamax = CCP_RSA_MAX_WIDTH, }; const struct ccp_vdata ccpv3 = { diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 64d318dc0d47..22d3e0e8b68c 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -237,7 +237,7 @@ static void cc_aead_complete(struct device *dev, void *cc_req, int err) * revealed the decrypted message --> zero its memory. */ sg_zero_buffer(areq->dst, sg_nents(areq->dst), - areq->cryptlen, 0); + areq->cryptlen, areq->assoclen); err = -EBADMSG; } /*ENCRYPT*/ diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index 3112b58d0bb1..56c9a8f26435 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -523,6 +523,7 @@ static void cc_setup_readiv_desc(struct crypto_tfm *tfm, } } + static void cc_setup_state_desc(struct crypto_tfm *tfm, struct cipher_req_ctx *req_ctx, unsigned int ivsize, unsigned int nbytes, @@ -534,8 +535,6 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, int cipher_mode = ctx_p->cipher_mode; int flow_mode = ctx_p->flow_mode; int direction = req_ctx->gen_ctx.op_type; - dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; - unsigned int key_len = ctx_p->keylen; dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; unsigned int du_size = nbytes; @@ -570,6 +569,47 @@ static void cc_setup_state_desc(struct crypto_tfm *tfm, break; case DRV_CIPHER_XTS: case DRV_CIPHER_ESSIV: + case DRV_CIPHER_BITLOCKER: + break; + default: + dev_err(dev, "Unsupported cipher mode (%d)\n", cipher_mode); + } +} + + +static void cc_setup_xex_state_desc(struct crypto_tfm *tfm, + struct cipher_req_ctx *req_ctx, + unsigned int ivsize, unsigned int nbytes, + struct cc_hw_desc desc[], + unsigned int *seq_size) +{ + struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm); + struct device *dev = drvdata_to_dev(ctx_p->drvdata); + int cipher_mode = ctx_p->cipher_mode; + int flow_mode = ctx_p->flow_mode; + int direction = req_ctx->gen_ctx.op_type; + dma_addr_t key_dma_addr = ctx_p->user.key_dma_addr; + unsigned int key_len = ctx_p->keylen; + dma_addr_t iv_dma_addr = req_ctx->gen_ctx.iv_dma_addr; + unsigned int du_size = nbytes; + + struct cc_crypto_alg *cc_alg = + container_of(tfm->__crt_alg, struct cc_crypto_alg, + skcipher_alg.base); + + if (cc_alg->data_unit) + du_size = cc_alg->data_unit; + + switch (cipher_mode) { + case DRV_CIPHER_ECB: + break; + case DRV_CIPHER_CBC: + case DRV_CIPHER_CBC_CTS: + case DRV_CIPHER_CTR: + case DRV_CIPHER_OFB: + break; + case DRV_CIPHER_XTS: + case DRV_CIPHER_ESSIV: case DRV_CIPHER_BITLOCKER: /* load XEX key */ hw_desc_init(&desc[*seq_size]); @@ -881,12 +921,14 @@ static int cc_cipher_process(struct skcipher_request *req, /* STAT_PHASE_2: Create sequence */ - /* Setup IV and XEX key used */ + /* Setup state (IV) */ cc_setup_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); /* Setup MLLI line, if needed */ cc_setup_mlli_desc(tfm, req_ctx, dst, src, nbytes, req, desc, &seq_len); /* Setup key */ cc_setup_key_desc(tfm, req_ctx, nbytes, desc, &seq_len); + /* Setup state (IV and XEX key) */ + cc_setup_xex_state_desc(tfm, req_ctx, ivsize, nbytes, desc, &seq_len); /* Data processing */ cc_setup_flow_desc(tfm, req_ctx, dst, src, nbytes, desc, &seq_len); /* Read next IV */ diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h index ab31d4a68c80..7d2f7e2c0bb5 100644 --- a/drivers/crypto/ccree/cc_driver.h +++ b/drivers/crypto/ccree/cc_driver.h @@ -161,6 +161,7 @@ struct cc_drvdata { int std_bodies; bool sec_disabled; u32 comp_mask; + bool pm_on; }; struct cc_crypto_alg { diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index dbc508fb719b..452bd77a9ba0 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -22,14 +22,8 @@ const struct dev_pm_ops ccree_pm = { int cc_pm_suspend(struct device *dev) { struct cc_drvdata *drvdata = dev_get_drvdata(dev); - int rc; dev_dbg(dev, "set HOST_POWER_DOWN_EN\n"); - rc = cc_suspend_req_queue(drvdata); - if (rc) { - dev_err(dev, "cc_suspend_req_queue (%x)\n", rc); - return rc; - } fini_cc_regs(drvdata); cc_iowrite(drvdata, CC_REG(HOST_POWER_DOWN_EN), POWER_DOWN_ENABLE); cc_clk_off(drvdata); @@ -63,13 +57,6 @@ int cc_pm_resume(struct device *dev) /* check if tee fips error occurred during power down */ cc_tee_handle_fips_error(drvdata); - rc = cc_resume_req_queue(drvdata); - if (rc) { - dev_err(dev, "cc_resume_req_queue (%x)\n", rc); - return rc; - } - - /* must be after the queue resuming as it uses the HW queue*/ cc_init_hash_sram(drvdata); return 0; @@ -80,12 +67,10 @@ int cc_pm_get(struct device *dev) int rc = 0; struct cc_drvdata *drvdata = dev_get_drvdata(dev); - if (cc_req_queue_suspended(drvdata)) + if (drvdata->pm_on) rc = pm_runtime_get_sync(dev); - else - pm_runtime_get_noresume(dev); - return rc; + return (rc == 1 ? 0 : rc); } int cc_pm_put_suspend(struct device *dev) @@ -93,14 +78,11 @@ int cc_pm_put_suspend(struct device *dev) int rc = 0; struct cc_drvdata *drvdata = dev_get_drvdata(dev); - if (!cc_req_queue_suspended(drvdata)) { + if (drvdata->pm_on) { pm_runtime_mark_last_busy(dev); rc = pm_runtime_put_autosuspend(dev); - } else { - /* Something wrong happens*/ - dev_err(dev, "request to suspend already suspended queue"); - rc = -EBUSY; } + return rc; } @@ -117,7 +99,7 @@ int cc_pm_init(struct cc_drvdata *drvdata) /* must be before the enabling to avoid resdundent suspending */ pm_runtime_set_autosuspend_delay(dev, CC_SUSPEND_TIMEOUT); pm_runtime_use_autosuspend(dev); - /* activate the PM module */ + /* set us as active - note we won't do PM ops until cc_pm_go()! */ return pm_runtime_set_active(dev); } @@ -125,9 +107,11 @@ int cc_pm_init(struct cc_drvdata *drvdata) void cc_pm_go(struct cc_drvdata *drvdata) { pm_runtime_enable(drvdata_to_dev(drvdata)); + drvdata->pm_on = true; } void cc_pm_fini(struct cc_drvdata *drvdata) { pm_runtime_disable(drvdata_to_dev(drvdata)); + drvdata->pm_on = false; } diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c index a947d5a2cf35..37e6fee37b13 100644 --- a/drivers/crypto/ccree/cc_request_mgr.c +++ b/drivers/crypto/ccree/cc_request_mgr.c @@ -41,7 +41,6 @@ struct cc_req_mgr_handle { #else struct tasklet_struct comptask; #endif - bool is_runtime_suspended; }; struct cc_bl_item { @@ -404,6 +403,7 @@ static void cc_proc_backlog(struct cc_drvdata *drvdata) spin_lock(&mgr->bl_lock); list_del(&bli->list); --mgr->bl_len; + kfree(bli); } spin_unlock(&mgr->bl_lock); @@ -677,52 +677,3 @@ static void comp_handler(unsigned long devarg) cc_proc_backlog(drvdata); dev_dbg(dev, "Comp. handler done.\n"); } - -/* - * resume the queue configuration - no need to take the lock as this happens - * inside the spin lock protection - */ -#if defined(CONFIG_PM) -int cc_resume_req_queue(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - spin_lock_bh(&request_mgr_handle->hw_lock); - request_mgr_handle->is_runtime_suspended = false; - spin_unlock_bh(&request_mgr_handle->hw_lock); - - return 0; -} - -/* - * suspend the queue configuration. Since it is used for the runtime suspend - * only verify that the queue can be suspended. - */ -int cc_suspend_req_queue(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - /* lock the send_request */ - spin_lock_bh(&request_mgr_handle->hw_lock); - if (request_mgr_handle->req_queue_head != - request_mgr_handle->req_queue_tail) { - spin_unlock_bh(&request_mgr_handle->hw_lock); - return -EBUSY; - } - request_mgr_handle->is_runtime_suspended = true; - spin_unlock_bh(&request_mgr_handle->hw_lock); - - return 0; -} - -bool cc_req_queue_suspended(struct cc_drvdata *drvdata) -{ - struct cc_req_mgr_handle *request_mgr_handle = - drvdata->request_mgr_handle; - - return request_mgr_handle->is_runtime_suspended; -} - -#endif diff --git a/drivers/crypto/ccree/cc_request_mgr.h b/drivers/crypto/ccree/cc_request_mgr.h index f46cf766fe4d..ff7746aaaf35 100644 --- a/drivers/crypto/ccree/cc_request_mgr.h +++ b/drivers/crypto/ccree/cc_request_mgr.h @@ -40,12 +40,4 @@ void complete_request(struct cc_drvdata *drvdata); void cc_req_mgr_fini(struct cc_drvdata *drvdata); -#if defined(CONFIG_PM) -int cc_resume_req_queue(struct cc_drvdata *drvdata); - -int cc_suspend_req_queue(struct cc_drvdata *drvdata); - -bool cc_req_queue_suspended(struct cc_drvdata *drvdata); -#endif - #endif /*__REQUEST_MGR_H__*/ diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 1b4a5664e604..d483eed030ee 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -3195,9 +3195,6 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) aeadctx->mayverify = VERIFY_SW; break; default: - - crypto_tfm_set_flags((struct crypto_tfm *) tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3222,8 +3219,6 @@ static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3264,8 +3259,6 @@ static int chcr_ccm_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3290,8 +3283,7 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead, ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -3329,8 +3321,7 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key, int error; if (keylen < 3) { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -3380,8 +3371,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, } else if (keylen == AES_KEYSIZE_256) { ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); pr_err("GCM: Invalid key length %d\n", keylen); ret = -EINVAL; goto out; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c index aca75237bbcf..dffa2aa855fd 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.c +++ b/drivers/crypto/chelsio/chtls/chtls_cm.c @@ -727,6 +727,14 @@ static int chtls_close_listsrv_rpl(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } +static void chtls_purge_wr_queue(struct sock *sk) +{ + struct sk_buff *skb; + + while ((skb = dequeue_wr(sk)) != NULL) + kfree_skb(skb); +} + static void chtls_release_resources(struct sock *sk) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); @@ -741,6 +749,11 @@ static void chtls_release_resources(struct sock *sk) kfree_skb(csk->txdata_skb_cache); csk->txdata_skb_cache = NULL; + if (csk->wr_credits != csk->wr_max_credits) { + chtls_purge_wr_queue(sk); + chtls_reset_wr_list(csk); + } + if (csk->l2t_entry) { cxgb4_l2t_release(csk->l2t_entry); csk->l2t_entry = NULL; @@ -1735,6 +1748,7 @@ static void chtls_peer_close(struct sock *sk, struct sk_buff *skb) else sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } + kfree_skb(skb); } static void chtls_close_con_rpl(struct sock *sk, struct sk_buff *skb) @@ -2062,19 +2076,6 @@ static int chtls_conn_cpl(struct chtls_dev *cdev, struct sk_buff *skb) return 0; } -static struct sk_buff *dequeue_wr(struct sock *sk) -{ - struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); - struct sk_buff *skb = csk->wr_skb_head; - - if (likely(skb)) { - /* Don't bother clearing the tail */ - csk->wr_skb_head = WR_SKB_CB(skb)->next_wr; - WR_SKB_CB(skb)->next_wr = NULL; - } - return skb; -} - static void chtls_rx_ack(struct sock *sk, struct sk_buff *skb) { struct cpl_fw4_ack *hdr = cplhdr(skb) + RSS_HDR; diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.h b/drivers/crypto/chelsio/chtls/chtls_cm.h index 129d7ac649a9..3fac0c74a41f 100644 --- a/drivers/crypto/chelsio/chtls/chtls_cm.h +++ b/drivers/crypto/chelsio/chtls/chtls_cm.h @@ -185,6 +185,12 @@ static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } +static inline void chtls_reset_wr_list(struct chtls_sock *csk) +{ + csk->wr_skb_head = NULL; + csk->wr_skb_tail = NULL; +} + static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb) { WR_SKB_CB(skb)->next_wr = NULL; @@ -197,4 +203,19 @@ static inline void enqueue_wr(struct chtls_sock *csk, struct sk_buff *skb) WR_SKB_CB(csk->wr_skb_tail)->next_wr = skb; csk->wr_skb_tail = skb; } + +static inline struct sk_buff *dequeue_wr(struct sock *sk) +{ + struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); + struct sk_buff *skb = NULL; + + skb = csk->wr_skb_head; + + if (likely(skb)) { + /* Don't bother clearing the tail */ + csk->wr_skb_head = WR_SKB_CB(skb)->next_wr; + WR_SKB_CB(skb)->next_wr = NULL; + } + return skb; +} #endif diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c index 2a34035d3cfb..a217fe72602d 100644 --- a/drivers/crypto/chelsio/chtls/chtls_hw.c +++ b/drivers/crypto/chelsio/chtls/chtls_hw.c @@ -350,6 +350,7 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) kwr->sc_imm.cmd_more = cpu_to_be32(ULPTX_CMD_V(ULP_TX_SC_IMM)); kwr->sc_imm.len = cpu_to_be32(klen); + lock_sock(sk); /* key info */ kctx = (struct _key_ctx *)(kwr + 1); ret = chtls_key_info(csk, kctx, keylen, optname); @@ -388,8 +389,10 @@ int chtls_setkey(struct chtls_sock *csk, u32 keylen, u32 optname) csk->tlshws.txkey = keyid; } + release_sock(sk); return ret; out_notcb: + release_sock(sk); free_tls_keyid(sk); out_nokey: kfree_skb(skb); diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index c0e7a85fe129..6e7c7573dc0c 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -16,7 +16,7 @@ config CRYPTO_DEV_HISI_SEC config CRYPTO_DEV_HISI_SEC2 tristate "Support for HiSilicon SEC2 crypto block cipher accelerator" - select CRYPTO_BLKCIPHER + select CRYPTO_SKCIPHER select CRYPTO_ALGAPI select CRYPTO_LIB_DES select CRYPTO_DEV_HISI_QM @@ -44,7 +44,6 @@ config CRYPTO_DEV_HISI_ZIP depends on ARM64 || (COMPILE_TEST && 64BIT) depends on !CPU_BIG_ENDIAN || COMPILE_TEST select CRYPTO_DEV_HISI_QM - select SG_SPLIT help Support for HiSilicon ZIP Driver diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 98f037e6ea3e..d8b015266ee4 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1043,6 +1043,7 @@ static unsigned int hpre_rsa_max_size(struct crypto_akcipher *tfm) static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm) { struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm); + int ret; ctx->rsa.soft_tfm = crypto_alloc_akcipher("rsa-generic", 0, 0); if (IS_ERR(ctx->rsa.soft_tfm)) { @@ -1050,7 +1051,11 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm) return PTR_ERR(ctx->rsa.soft_tfm); } - return hpre_ctx_init(ctx); + ret = hpre_ctx_init(ctx); + if (ret) + crypto_free_akcipher(ctx->rsa.soft_tfm); + + return ret; } static void hpre_rsa_exit_tfm(struct crypto_akcipher *tfm) diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 34e0424410bf..0c98c37e39f4 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -106,18 +106,18 @@ static const char * const hpre_debug_file_name[] = { }; static const struct hpre_hw_error hpre_hw_errors[] = { - { .int_msk = BIT(0), .msg = "hpre_ecc_1bitt_err" }, - { .int_msk = BIT(1), .msg = "hpre_ecc_2bit_err" }, - { .int_msk = BIT(2), .msg = "hpre_data_wr_err" }, - { .int_msk = BIT(3), .msg = "hpre_data_rd_err" }, - { .int_msk = BIT(4), .msg = "hpre_bd_rd_err" }, - { .int_msk = BIT(5), .msg = "hpre_ooo_2bit_ecc_err" }, - { .int_msk = BIT(6), .msg = "hpre_cltr1_htbt_tm_out_err" }, - { .int_msk = BIT(7), .msg = "hpre_cltr2_htbt_tm_out_err" }, - { .int_msk = BIT(8), .msg = "hpre_cltr3_htbt_tm_out_err" }, - { .int_msk = BIT(9), .msg = "hpre_cltr4_htbt_tm_out_err" }, - { .int_msk = GENMASK(15, 10), .msg = "hpre_ooo_rdrsp_err" }, - { .int_msk = GENMASK(21, 16), .msg = "hpre_ooo_wrrsp_err" }, + { .int_msk = BIT(0), .msg = "core_ecc_1bit_err_int_set" }, + { .int_msk = BIT(1), .msg = "core_ecc_2bit_err_int_set" }, + { .int_msk = BIT(2), .msg = "dat_wb_poison_int_set" }, + { .int_msk = BIT(3), .msg = "dat_rd_poison_int_set" }, + { .int_msk = BIT(4), .msg = "bd_rd_poison_int_set" }, + { .int_msk = BIT(5), .msg = "ooo_ecc_2bit_err_int_set" }, + { .int_msk = BIT(6), .msg = "cluster1_shb_timeout_int_set" }, + { .int_msk = BIT(7), .msg = "cluster2_shb_timeout_int_set" }, + { .int_msk = BIT(8), .msg = "cluster3_shb_timeout_int_set" }, + { .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" }, + { .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" }, + { .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" }, { /* sentinel */ } }; diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index b846d73d9a85..841f4c56ca73 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -40,7 +40,7 @@ struct sec_req { int req_id; /* Status of the SEC request */ - atomic_t fake_busy; + bool fake_busy; }; /** diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 0a5391fff485..2475aaf0d59b 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -141,7 +141,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) return -ENOBUFS; if (!ret) { - if (atomic_read(&req->fake_busy)) + if (req->fake_busy) ret = -EBUSY; else ret = -EINPROGRESS; @@ -641,7 +641,7 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req) if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt) sec_update_iv(req); - if (atomic_cmpxchg(&req->fake_busy, 1, 0) != 1) + if (req->fake_busy) sk_req->base.complete(&sk_req->base, -EINPROGRESS); sk_req->base.complete(&sk_req->base, req->err_type); @@ -672,9 +672,9 @@ static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req) } if (ctx->fake_req_limit <= atomic_inc_return(&qp_ctx->pending_reqs)) - atomic_set(&req->fake_busy, 1); + req->fake_busy = true; else - atomic_set(&req->fake_busy, 0); + req->fake_busy = false; ret = ctx->req_op->get_res(ctx, req); if (ret) { diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index ab742dfbab99..d40e2da3b05d 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -608,13 +608,13 @@ static const struct file_operations sec_dbg_fops = { .write = sec_debug_write, }; -static int debugfs_atomic64_t_get(void *data, u64 *val) +static int sec_debugfs_atomic64_get(void *data, u64 *val) { - *val = atomic64_read((atomic64_t *)data); - return 0; + *val = atomic64_read((atomic64_t *)data); + return 0; } -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic64_t_ro, debugfs_atomic64_t_get, NULL, - "%lld\n"); +DEFINE_DEBUGFS_ATTRIBUTE(sec_atomic64_ops, sec_debugfs_atomic64_get, + NULL, "%lld\n"); static int sec_core_debug_init(struct sec_dev *sec) { @@ -636,11 +636,11 @@ static int sec_core_debug_init(struct sec_dev *sec) debugfs_create_regset32("regs", 0444, tmp_d, regset); - debugfs_create_file("send_cnt", 0444, tmp_d, &dfx->send_cnt, - &fops_atomic64_t_ro); + debugfs_create_file("send_cnt", 0444, tmp_d, + &dfx->send_cnt, &sec_atomic64_ops); - debugfs_create_file("recv_cnt", 0444, tmp_d, &dfx->recv_cnt, - &fops_atomic64_t_ro); + debugfs_create_file("recv_cnt", 0444, tmp_d, + &dfx->recv_cnt, &sec_atomic64_ops); return 0; } diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 012023c347b1..1e153a0d7c10 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -202,18 +202,21 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, dma_addr_t curr_sgl_dma = 0; struct acc_hw_sge *curr_hw_sge; struct scatterlist *sg; - int i, ret, sg_n; + int i, sg_n, sg_n_mapped; if (!dev || !sgl || !pool || !hw_sgl_dma) return ERR_PTR(-EINVAL); sg_n = sg_nents(sgl); - if (sg_n > pool->sge_nr) + + sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); + if (!sg_n_mapped) return ERR_PTR(-EINVAL); - ret = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); - if (!ret) + if (sg_n_mapped > pool->sge_nr) { + dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); return ERR_PTR(-EINVAL); + } curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma); if (IS_ERR(curr_hw_sgl)) { @@ -224,7 +227,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, curr_hw_sgl->entry_length_in_sgl = cpu_to_le16(pool->sge_nr); curr_hw_sge = curr_hw_sgl->sge_entries; - for_each_sg(sgl, sg, sg_n, i) { + for_each_sg(sgl, sg, sg_n_mapped, i) { sg_map_to_hw_sg(sg, curr_hw_sge); inc_hw_sgl_sge(curr_hw_sgl); curr_hw_sge++; diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index 79fc4dd3fe00..bc1db26598bb 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -11,6 +11,10 @@ /* hisi_zip_sqe dw3 */ #define HZIP_BD_STATUS_M GENMASK(7, 0) +/* hisi_zip_sqe dw7 */ +#define HZIP_IN_SGE_DATA_OFFSET_M GENMASK(23, 0) +/* hisi_zip_sqe dw8 */ +#define HZIP_OUT_SGE_DATA_OFFSET_M GENMASK(23, 0) /* hisi_zip_sqe dw9 */ #define HZIP_REQ_TYPE_M GENMASK(7, 0) #define HZIP_ALG_TYPE_ZLIB 0x02 diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 795428c1d07e..9815d5e3ccd0 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -46,10 +46,8 @@ enum hisi_zip_alg_type { struct hisi_zip_req { struct acomp_req *req; - struct scatterlist *src; - struct scatterlist *dst; - size_t slen; - size_t dlen; + int sskip; + int dskip; struct hisi_acc_hw_sgl *hw_src; struct hisi_acc_hw_sgl *hw_dst; dma_addr_t dma_src; @@ -119,13 +117,15 @@ static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag) static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type, dma_addr_t s_addr, dma_addr_t d_addr, u32 slen, - u32 dlen) + u32 dlen, int sskip, int dskip) { memset(sqe, 0, sizeof(struct hisi_zip_sqe)); - sqe->input_data_length = slen; + sqe->input_data_length = slen - sskip; + sqe->dw7 = FIELD_PREP(HZIP_IN_SGE_DATA_OFFSET_M, sskip); + sqe->dw8 = FIELD_PREP(HZIP_OUT_SGE_DATA_OFFSET_M, dskip); sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type); - sqe->dest_avail_out = dlen; + sqe->dest_avail_out = dlen - dskip; sqe->source_addr_l = lower_32_bits(s_addr); sqe->source_addr_h = upper_32_bits(s_addr); sqe->dest_addr_l = lower_32_bits(d_addr); @@ -327,11 +327,6 @@ static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx, { struct hisi_zip_req_q *req_q = &qp_ctx->req_q; - if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP) - kfree(req->dst); - else - kfree(req->src); - write_lock(&req_q->req_lock); clear_bit(req->req_id, req_q->req_bitmap); memset(req, 0, sizeof(struct hisi_zip_req)); @@ -359,8 +354,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data) } dlen = sqe->produced; - hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); - hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src); + hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst); head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0; acomp_req->dlen = dlen + head_size; @@ -454,20 +449,6 @@ static size_t get_comp_head_size(struct scatterlist *src, u8 req_type) } } -static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes, - size_t remains, struct scatterlist **out) -{ -#define SPLIT_NUM 2 - size_t split_sizes[SPLIT_NUM]; - int out_mapped_nents[SPLIT_NUM]; - - split_sizes[0] = bytes; - split_sizes[1] = remains; - - return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out, - out_mapped_nents, GFP_KERNEL); -} - static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, struct hisi_zip_qp_ctx *qp_ctx, size_t head_size, bool is_comp) @@ -475,31 +456,7 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, struct hisi_zip_req_q *req_q = &qp_ctx->req_q; struct hisi_zip_req *q = req_q->q; struct hisi_zip_req *req_cache; - struct scatterlist *out[2]; - struct scatterlist *sgl; - size_t len; - int ret, req_id; - - /* - * remove/add zlib/gzip head, as hardware operations do not include - * comp head. so split req->src to get sgl without heads in acomp, or - * add comp head to req->dst ahead of that hardware output compressed - * data in sgl splited from req->dst without comp head. - */ - if (is_comp) { - sgl = req->dst; - len = req->dlen - head_size; - } else { - sgl = req->src; - len = req->slen - head_size; - } - - ret = get_sg_skip_bytes(sgl, head_size, len, out); - if (ret) - return ERR_PTR(ret); - - /* sgl for comp head is useless, so free it now */ - kfree(out[0]); + int req_id; write_lock(&req_q->req_lock); @@ -507,7 +464,6 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, if (req_id >= req_q->size) { write_unlock(&req_q->req_lock); dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n"); - kfree(out[1]); return ERR_PTR(-EBUSY); } set_bit(req_id, req_q->req_bitmap); @@ -515,16 +471,13 @@ static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req, req_cache = q + req_id; req_cache->req_id = req_id; req_cache->req = req; + if (is_comp) { - req_cache->src = req->src; - req_cache->dst = out[1]; - req_cache->slen = req->slen; - req_cache->dlen = req->dlen - head_size; + req_cache->sskip = 0; + req_cache->dskip = head_size; } else { - req_cache->src = out[1]; - req_cache->dst = req->dst; - req_cache->slen = req->slen - head_size; - req_cache->dlen = req->dlen; + req_cache->sskip = head_size; + req_cache->dskip = 0; } write_unlock(&req_q->req_lock); @@ -536,6 +489,7 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, struct hisi_zip_qp_ctx *qp_ctx) { struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe; + struct acomp_req *a_req = req->req; struct hisi_qp *qp = qp_ctx->qp; struct device *dev = &qp->qm->pdev->dev; struct hisi_acc_sgl_pool *pool = qp_ctx->sgl_pool; @@ -543,16 +497,16 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, dma_addr_t output; int ret; - if (!req->src || !req->slen || !req->dst || !req->dlen) + if (!a_req->src || !a_req->slen || !a_req->dst || !a_req->dlen) return -EINVAL; - req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool, + req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool, req->req_id << 1, &input); if (IS_ERR(req->hw_src)) return PTR_ERR(req->hw_src); req->dma_src = input; - req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool, + req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool, (req->req_id << 1) + 1, &output); if (IS_ERR(req->hw_dst)) { @@ -561,8 +515,8 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, } req->dma_dst = output; - hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen, - req->dlen); + hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, a_req->slen, + a_req->dlen, req->sskip, req->dskip); hisi_zip_config_buf_type(zip_sqe, HZIP_SGL); hisi_zip_config_tag(zip_sqe, req->req_id); @@ -574,9 +528,9 @@ static int hisi_zip_do_work(struct hisi_zip_req *req, return -EINPROGRESS; err_unmap_output: - hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst); err_unmap_input: - hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src); + hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src); return ret; } diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 29da449b3e9e..d187312b9864 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1595,6 +1595,11 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ +static void spacc_tasklet_kill(void *data) +{ + tasklet_kill(data); +} + static int spacc_probe(struct platform_device *pdev) { int i, err, ret; @@ -1637,6 +1642,14 @@ static int spacc_probe(struct platform_device *pdev) return -ENXIO; } + tasklet_init(&engine->complete, spacc_spacc_complete, + (unsigned long)engine); + + ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, + &engine->complete); + if (ret) + return ret; + if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); @@ -1694,8 +1707,6 @@ static int spacc_probe(struct platform_device *pdev) INIT_LIST_HEAD(&engine->completed); INIT_LIST_HEAD(&engine->in_progress); engine->in_flight = 0; - tasklet_init(&engine->complete, spacc_spacc_complete, - (unsigned long)engine); platform_set_drvdata(pdev, engine); diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c index d59e736882f6..9fee1b1532a4 100644 --- a/drivers/crypto/vmx/aes_xts.c +++ b/drivers/crypto/vmx/aes_xts.c @@ -84,6 +84,9 @@ static int p8_aes_xts_crypt(struct skcipher_request *req, int enc) u8 tweak[AES_BLOCK_SIZE]; int ret; + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) { struct skcipher_request *subreq = skcipher_request_ctx(req); diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 35535833b6f7..c7804635e89e 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -115,7 +115,8 @@ config ARM_TEGRA20_DEVFREQ config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" - depends on ARCH_ROCKCHIP + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) select DEVFREQ_EVENT_ROCKCHIP_DFI select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 57f6944d65a6..c5a34be182ca 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #define HZ_PER_KHZ 1000 static struct class *devfreq_class; +static struct dentry *devfreq_debugfs; /* * devfreq core provides delayed work based load monitoring helper @@ -209,10 +211,10 @@ static int set_freq_table(struct devfreq *devfreq) int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) { int lev, prev_lev, ret = 0; - unsigned long cur_time; + u64 cur_time; lockdep_assert_held(&devfreq->lock); - cur_time = jiffies; + cur_time = get_jiffies_64(); /* Immediately exit if previous_freq is not initialized yet. */ if (!devfreq->previous_freq) @@ -535,7 +537,7 @@ void devfreq_monitor_resume(struct devfreq *devfreq) msecs_to_jiffies(devfreq->profile->polling_ms)); out_update: - devfreq->last_stat_updated = jiffies; + devfreq->last_stat_updated = get_jiffies_64(); devfreq->stop_polling = false; if (devfreq->profile->get_cur_freq && @@ -736,7 +738,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -798,8 +799,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); atomic_set(&devfreq->suspend_count, 0); - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); @@ -820,7 +820,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->time_in_state = devm_kcalloc(&devfreq->dev, devfreq->profile->max_state, - sizeof(unsigned long), + sizeof(*devfreq->time_in_state), GFP_KERNEL); if (!devfreq->time_in_state) { mutex_unlock(&devfreq->lock); @@ -828,7 +828,7 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_devfreq; } - devfreq->last_stat_updated = jiffies; + devfreq->last_stat_updated = get_jiffies_64(); srcu_init_notifier_head(&devfreq->transition_notifier_list); @@ -1259,6 +1259,14 @@ int devfreq_remove_governor(struct devfreq_governor *governor) } EXPORT_SYMBOL(devfreq_remove_governor); +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + return sprintf(buf, "%s\n", dev_name(devfreq->dev.parent)); +} +static DEVICE_ATTR_RO(name); + static ssize_t governor_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1581,8 +1589,8 @@ static ssize_t trans_stat_show(struct device *dev, for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", devfreq->trans_table[(i * max_state) + j]); - len += sprintf(buf + len, "%10u\n", - jiffies_to_msecs(devfreq->time_in_state[i])); + len += sprintf(buf + len, "%10llu\n", (u64) + jiffies64_to_msecs(devfreq->time_in_state[i])); } len += sprintf(buf + len, "Total transition : %u\n", @@ -1592,6 +1600,7 @@ static ssize_t trans_stat_show(struct device *dev, static DEVICE_ATTR_RO(trans_stat); static struct attribute *devfreq_attrs[] = { + &dev_attr_name.attr, &dev_attr_governor.attr, &dev_attr_available_governors.attr, &dev_attr_cur_freq.attr, @@ -1605,6 +1614,81 @@ static struct attribute *devfreq_attrs[] = { }; ATTRIBUTE_GROUPS(devfreq); +/** + * devfreq_summary_show() - Show the summary of the devfreq devices + * @s: seq_file instance to show the summary of devfreq devices + * @data: not used + * + * Show the summary of the devfreq devices via 'devfreq_summary' debugfs file. + * It helps that user can know the detailed information of the devfreq devices. + * + * Return 0 always because it shows the information without any data change. + */ +static int devfreq_summary_show(struct seq_file *s, void *data) +{ + struct devfreq *devfreq; + struct devfreq *p_devfreq = NULL; + unsigned long cur_freq, min_freq, max_freq; + unsigned int polling_ms; + + seq_printf(s, "%-30s %-10s %-10s %-15s %10s %12s %12s %12s\n", + "dev_name", + "dev", + "parent_dev", + "governor", + "polling_ms", + "cur_freq_Hz", + "min_freq_Hz", + "max_freq_Hz"); + seq_printf(s, "%30s %10s %10s %15s %10s %12s %12s %12s\n", + "------------------------------", + "----------", + "----------", + "---------------", + "----------", + "------------", + "------------", + "------------"); + + mutex_lock(&devfreq_list_lock); + + list_for_each_entry_reverse(devfreq, &devfreq_list, node) { +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) + if (!strncmp(devfreq->governor_name, DEVFREQ_GOV_PASSIVE, + DEVFREQ_NAME_LEN)) { + struct devfreq_passive_data *data = devfreq->data; + + if (data) + p_devfreq = data->parent; + } else { + p_devfreq = NULL; + } +#endif + + mutex_lock(&devfreq->lock); + cur_freq = devfreq->previous_freq, + get_freq_range(devfreq, &min_freq, &max_freq); + polling_ms = devfreq->profile->polling_ms, + mutex_unlock(&devfreq->lock); + + seq_printf(s, + "%-30s %-10s %-10s %-15s %10d %12ld %12ld %12ld\n", + dev_name(devfreq->dev.parent), + dev_name(&devfreq->dev), + p_devfreq ? dev_name(&p_devfreq->dev) : "null", + devfreq->governor_name, + polling_ms, + cur_freq, + min_freq, + max_freq); + } + + mutex_unlock(&devfreq_list_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(devfreq_summary); + static int __init devfreq_init(void) { devfreq_class = class_create(THIS_MODULE, "devfreq"); @@ -1621,6 +1705,11 @@ static int __init devfreq_init(void) } devfreq_class->dev_groups = devfreq_groups; + devfreq_debugfs = debugfs_create_dir("devfreq", NULL); + debugfs_create_file("devfreq_summary", 0444, + devfreq_debugfs, NULL, + &devfreq_summary_fops); + return 0; } subsys_initcall(devfreq_init); diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index cef2cf5347ca..a53e0a6ffdfe 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -34,7 +34,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU config DEVFREQ_EVENT_ROCKCHIP_DFI tristate "ROCKCHIP DFI DEVFREQ event Driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help This add the devfreq-event driver for Rockchip SoC. It provides DFI (DDR Monitor Module) driver to count ddr load. diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index 85c7a77bf3f0..055deea42c37 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -101,17 +101,22 @@ static struct __exynos_ppmu_events { PPMU_EVENT(dmc1_1), }; -static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev) +static int __exynos_ppmu_find_ppmu_id(const char *edev_name) { int i; for (i = 0; i < ARRAY_SIZE(ppmu_events); i++) - if (!strcmp(edev->desc->name, ppmu_events[i].name)) + if (!strcmp(edev_name, ppmu_events[i].name)) return ppmu_events[i].id; return -EINVAL; } +static int exynos_ppmu_find_ppmu_id(struct devfreq_event_dev *edev) +{ + return __exynos_ppmu_find_ppmu_id(edev->desc->name); +} + /* * The devfreq-event ops structure for PPMU v1.1 */ @@ -556,13 +561,11 @@ static int of_get_devfreq_events(struct device_node *np, * use default if not. */ if (info->ppmu_type == EXYNOS_TYPE_PPMU_V2) { - struct devfreq_event_dev edev; int id; /* Not all registers take the same value for * read+write data count. */ - edev.desc = &desc[j]; - id = exynos_ppmu_find_ppmu_id(&edev); + id = __exynos_ppmu_find_ppmu_id(desc[j].name); switch (id) { case PPMU_PMNCNT0: diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index ce41cd9b758a..2427398ff22a 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -108,6 +108,7 @@ static int dma_buf_release(struct inode *inode, struct file *file) dma_resv_fini(dmabuf->resv); module_put(dmabuf->owner); + kfree(dmabuf->name); kfree(dmabuf); return 0; } diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index e51d836afcc7..1092d4ce723e 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1947,8 +1947,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1969,8 +1967,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index a0ee404b736e..f1d149e32839 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -830,6 +830,7 @@ static int axi_dmac_probe(struct platform_device *pdev) struct dma_device *dma_dev; struct axi_dmac *dmac; struct resource *res; + struct regmap *regmap; int ret; dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL); @@ -921,10 +922,17 @@ static int axi_dmac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dmac); - devm_regmap_init_mmio(&pdev->dev, dmac->base, &axi_dmac_regmap_config); + regmap = devm_regmap_init_mmio(&pdev->dev, dmac->base, + &axi_dmac_regmap_config); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + goto err_free_irq; + } return 0; +err_free_irq: + free_irq(dmac->irq, dmac); err_unregister_of: of_dma_controller_free(pdev->dev.of_node); err_unregister_device: diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 03ac4b96117c..4b604086b1b3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -179,7 +179,7 @@ __dma_device_satisfies_mask(struct dma_device *device, static struct module *dma_chan_to_owner(struct dma_chan *chan) { - return chan->device->dev->driver->owner; + return chan->device->owner; } /** @@ -919,6 +919,8 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + device->owner = device->dev->driver->owner; + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c index 89792083d62c..95cc0256b387 100644 --- a/drivers/dma/fsl-qdma.c +++ b/drivers/dma/fsl-qdma.c @@ -304,7 +304,7 @@ static void fsl_qdma_free_chan_resources(struct dma_chan *chan) vchan_dma_desc_free_list(&fsl_chan->vchan, &head); - if (!fsl_queue->comp_pool && !fsl_queue->comp_pool) + if (!fsl_queue->comp_pool && !fsl_queue->desc_pool) return; list_for_each_entry_safe(comp_temp, _comp_temp, diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index c27e206a764c..67736c801f3c 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1328,13 +1328,14 @@ static void sdma_free_chan_resources(struct dma_chan *chan) sdma_channel_synchronize(chan); - if (sdmac->event_id0) + if (sdmac->event_id0 >= 0) sdma_event_disable(sdmac, sdmac->event_id0); if (sdmac->event_id1) sdma_event_disable(sdmac, sdmac->event_id1); sdmac->event_id0 = 0; sdmac->event_id1 = 0; + sdmac->context_loaded = false; sdma_set_channel_priority(sdmac, 0); @@ -1628,7 +1629,7 @@ static int sdma_config(struct dma_chan *chan, memcpy(&sdmac->slave_config, dmaengine_cfg, sizeof(*dmaengine_cfg)); /* Set ENBLn earlier to make sure dma request triggered after that */ - if (sdmac->event_id0) { + if (sdmac->event_id0 >= 0) { if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events) return -EINVAL; sdma_event_enable(sdmac, sdmac->event_id0); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 3a45079d11ec..4a750e29bfb5 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -281,7 +281,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; @@ -756,10 +756,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c index 756a3c951dc7..03a7f647f7b2 100644 --- a/drivers/dma/ti/edma.c +++ b/drivers/dma/ti/edma.c @@ -2289,13 +2289,6 @@ static int edma_probe(struct platform_device *pdev) if (!info) return -ENODEV; - pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - dev_err(dev, "pm_runtime_get_sync() failed\n"); - return ret; - } - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) return ret; @@ -2326,27 +2319,33 @@ static int edma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ecc); + pm_runtime_enable(dev); + ret = pm_runtime_get_sync(dev); + if (ret < 0) { + dev_err(dev, "pm_runtime_get_sync() failed\n"); + pm_runtime_disable(dev); + return ret; + } + /* Get eDMA3 configuration from IP */ ret = edma_setup_from_hw(dev, info, ecc); if (ret) - return ret; + goto err_disable_pm; /* Allocate memory based on the information we got from the IP */ ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels, sizeof(*ecc->slave_chans), GFP_KERNEL); - if (!ecc->slave_chans) - return -ENOMEM; ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots), sizeof(unsigned long), GFP_KERNEL); - if (!ecc->slot_inuse) - return -ENOMEM; ecc->channels_mask = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_channels), sizeof(unsigned long), GFP_KERNEL); - if (!ecc->channels_mask) - return -ENOMEM; + if (!ecc->slave_chans || !ecc->slot_inuse || !ecc->channels_mask) { + ret = -ENOMEM; + goto err_disable_pm; + } /* Mark all channels available initially */ bitmap_fill(ecc->channels_mask, ecc->num_channels); @@ -2388,7 +2387,7 @@ static int edma_probe(struct platform_device *pdev) ecc); if (ret) { dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret); - return ret; + goto err_disable_pm; } ecc->ccint = irq; } @@ -2404,7 +2403,7 @@ static int edma_probe(struct platform_device *pdev) ecc); if (ret) { dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret); - return ret; + goto err_disable_pm; } ecc->ccerrint = irq; } @@ -2412,7 +2411,8 @@ static int edma_probe(struct platform_device *pdev) ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY); if (ecc->dummy_slot < 0) { dev_err(dev, "Can't allocate PaRAM dummy slot\n"); - return ecc->dummy_slot; + ret = ecc->dummy_slot; + goto err_disable_pm; } queue_priority_mapping = info->queue_priority_mapping; @@ -2512,6 +2512,9 @@ static int edma_probe(struct platform_device *pdev) err_reg1: edma_free_slot(ecc, ecc->dummy_slot); +err_disable_pm: + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); return ret; } @@ -2542,6 +2545,8 @@ static int edma_remove(struct platform_device *pdev) if (ecc->dma_memcpy) dma_async_device_unregister(ecc->dma_memcpy); edma_free_slot(ecc, ecc->dummy_slot); + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); return 0; } diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 7243b88f81d8..69e0d90460e6 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -505,16 +505,10 @@ void edac_mc_free(struct mem_ctl_info *mci) { edac_dbg(1, "\n"); - /* If we're not yet registered with sysfs free only what was allocated - * in edac_mc_alloc(). - */ - if (!device_is_registered(&mci->dev)) { - _edac_mc_free(mci); - return; - } + if (device_is_registered(&mci->dev)) + edac_unregister_sysfs(mci); - /* the mci instance is freed here, when the sysfs object is dropped */ - edac_unregister_sysfs(mci); + _edac_mc_free(mci); } EXPORT_SYMBOL_GPL(edac_mc_free); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0367554e7437..c70ec0a306d8 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -276,10 +276,7 @@ static const struct attribute_group *csrow_attr_groups[] = { static void csrow_attr_release(struct device *dev) { - struct csrow_info *csrow = container_of(dev, struct csrow_info, dev); - - edac_dbg(1, "device %s released\n", dev_name(dev)); - kfree(csrow); + /* release device with _edac_mc_free() */ } static const struct device_type csrow_attr_type = { @@ -447,8 +444,7 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) csrow = mci->csrows[i]; if (!nr_pages_per_csrow(csrow)) continue; - - device_del(&mci->csrows[i]->dev); + device_unregister(&mci->csrows[i]->dev); } return err; @@ -608,10 +604,7 @@ static const struct attribute_group *dimm_attr_groups[] = { static void dimm_attr_release(struct device *dev) { - struct dimm_info *dimm = container_of(dev, struct dimm_info, dev); - - edac_dbg(1, "device %s released\n", dev_name(dev)); - kfree(dimm); + /* release device with _edac_mc_free() */ } static const struct device_type dimm_attr_type = { @@ -893,10 +886,7 @@ static const struct attribute_group *mci_attr_groups[] = { static void mci_attr_release(struct device *dev) { - struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); - - edac_dbg(1, "device %s released\n", dev_name(dev)); - kfree(mci); + /* release device with _edac_mc_free() */ } static const struct device_type mci_attr_type = { diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c index c0cc72a3b2be..3a3dcb14ed99 100644 --- a/drivers/edac/sifive_edac.c +++ b/drivers/edac/sifive_edac.c @@ -54,8 +54,8 @@ static int ecc_register(struct platform_device *pdev) p->dci = edac_device_alloc_ctl_info(0, "sifive_ecc", 1, "sifive_ecc", 1, 1, NULL, 0, edac_device_alloc_index()); - if (IS_ERR(p->dci)) - return PTR_ERR(p->dci); + if (!p->dci) + return -ENOMEM; p->dci->dev = &pdev->dev; p->dci->mod_name = "Sifive ECC Manager"; diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 95662a4ff4c4..99bbaf629b8d 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -256,7 +256,7 @@ int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm) pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL); if (!pdev) { - skx_printk(KERN_ERR, "Can't get tolm/tohm\n"); + edac_dbg(2, "Can't get tolm/tohm\n"); return -ENODEV; } diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 2d263382d797..880ffd833718 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -479,20 +479,14 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) pinf = &p->ceinfo; if (!priv->p_data->quirks) { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type:%s Row %d Bank %d Col %d ", - "CE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "Bit Position: %d Data: 0x%08x\n", + "DDR ECC error type:%s Row %d Bank %d Col %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, pinf->col, pinf->bitpos, pinf->data); } else { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type:%s Row %d Bank %d Col %d ", - "CE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "BankGroup Number %d Block Number %d ", - pinf->bankgrpnr, pinf->blknr); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "Bit Position: %d Data: 0x%08x\n", + "DDR ECC error type:%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d Bit Position: %d Data: 0x%08x", + "CE", pinf->row, pinf->bank, pinf->col, + pinf->bankgrpnr, pinf->blknr, pinf->bitpos, pinf->data); } @@ -509,10 +503,8 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p) "UE", pinf->row, pinf->bank, pinf->col); } else { snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "DDR ECC error type :%s Row %d Bank %d Col %d ", - "UE", pinf->row, pinf->bank, pinf->col); - snprintf(priv->message, SYNPS_EDAC_MSG_SIZE, - "BankGroup Number %d Block Number %d", + "DDR ECC error type :%s Row %d Bank %d Col %d BankGroup Number %d Block Number %d", + "UE", pinf->row, pinf->bank, pinf->col, pinf->bankgrpnr, pinf->blknr); } diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 904fa09e6a6b..d99f5b0c8a09 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -10,10 +10,12 @@ #define pr_fmt(fmt) "efi: " fmt #include +#include #include #include #include #include +#include #include #include #include @@ -276,15 +278,112 @@ void __init efi_init(void) efi_memmap_unmap(); } +static bool efifb_overlaps_pci_range(const struct of_pci_range *range) +{ + u64 fb_base = screen_info.lfb_base; + + if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) + fb_base |= (u64)(unsigned long)screen_info.ext_lfb_base << 32; + + return fb_base >= range->cpu_addr && + fb_base < (range->cpu_addr + range->size); +} + +static struct device_node *find_pci_overlap_node(void) +{ + struct device_node *np; + + for_each_node_by_type(np, "pci") { + struct of_pci_range_parser parser; + struct of_pci_range range; + int err; + + err = of_pci_range_parser_init(&parser, np); + if (err) { + pr_warn("of_pci_range_parser_init() failed: %d\n", err); + continue; + } + + for_each_of_pci_range(&parser, &range) + if (efifb_overlaps_pci_range(&range)) + return np; + } + return NULL; +} + +/* + * If the efifb framebuffer is backed by a PCI graphics controller, we have + * to ensure that this relation is expressed using a device link when + * running in DT mode, or the probe order may be reversed, resulting in a + * resource reservation conflict on the memory window that the efifb + * framebuffer steals from the PCIe host bridge. + */ +static int efifb_add_links(const struct fwnode_handle *fwnode, + struct device *dev) +{ + struct device_node *sup_np; + struct device *sup_dev; + + sup_np = find_pci_overlap_node(); + + /* + * If there's no PCI graphics controller backing the efifb, we are + * done here. + */ + if (!sup_np) + return 0; + + sup_dev = get_dev_from_fwnode(&sup_np->fwnode); + of_node_put(sup_np); + + /* + * Return -ENODEV if the PCI graphics controller device hasn't been + * registered yet. This ensures that efifb isn't allowed to probe + * and this function is retried again when new devices are + * registered. + */ + if (!sup_dev) + return -ENODEV; + + /* + * If this fails, retrying this function at a later point won't + * change anything. So, don't return an error after this. + */ + if (!device_link_add(dev, sup_dev, 0)) + dev_warn(dev, "device_link_add() failed\n"); + + put_device(sup_dev); + + return 0; +} + +static const struct fwnode_operations efifb_fwnode_ops = { + .add_links = efifb_add_links, +}; + +static struct fwnode_handle efifb_fwnode = { + .ops = &efifb_fwnode_ops, +}; + static int __init register_gop_device(void) { - void *pd; + struct platform_device *pd; + int err; if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) return 0; - pd = platform_device_register_data(NULL, "efi-framebuffer", 0, - &screen_info, sizeof(screen_info)); - return PTR_ERR_OR_ZERO(pd); + pd = platform_device_alloc("efi-framebuffer", 0); + if (!pd) + return -ENOMEM; + + if (IS_ENABLED(CONFIG_PCI)) + pd->dev.fwnode = &efifb_fwnode; + + err = platform_device_add_data(pd, &screen_info, sizeof(screen_info)); + if (err) + return err; + + return platform_device_add(pd); } subsys_initcall(register_gop_device); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 2b02cb165f16..a9778591341b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -552,7 +552,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, seed = early_memremap(efi.rng_seed, sizeof(*seed)); if (seed != NULL) { - size = seed->size; + size = READ_ONCE(seed->size); early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); @@ -562,7 +562,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, sizeof(*seed) + size); if (seed != NULL) { pr_notice("seeding entropy pool\n"); - add_bootloader_randomness(seed->bits, seed->size); + add_bootloader_randomness(seed->bits, size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 7576450c8254..aff3dfb4d7ba 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -83,13 +83,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -116,13 +119,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -133,12 +139,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -199,6 +208,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (in_compat_syscall()) { struct compat_efi_variable *compat; @@ -250,14 +262,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (in_compat_syscall()) { diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c index 03b43b7a6d1d..f71eaa5bf52d 100644 --- a/drivers/firmware/imx/imx-scu.c +++ b/drivers/firmware/imx/imx-scu.c @@ -29,6 +29,7 @@ struct imx_sc_chan { struct mbox_client cl; struct mbox_chan *ch; int idx; + struct completion tx_done; }; struct imx_sc_ipc { @@ -100,6 +101,14 @@ int imx_scu_get_handle(struct imx_sc_ipc **ipc) } EXPORT_SYMBOL(imx_scu_get_handle); +/* Callback called when the word of a message is ack-ed, eg read by SCU */ +static void imx_scu_tx_done(struct mbox_client *cl, void *mssg, int r) +{ + struct imx_sc_chan *sc_chan = container_of(cl, struct imx_sc_chan, cl); + + complete(&sc_chan->tx_done); +} + static void imx_scu_rx_callback(struct mbox_client *c, void *msg) { struct imx_sc_chan *sc_chan = container_of(c, struct imx_sc_chan, cl); @@ -149,6 +158,19 @@ static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg) for (i = 0; i < hdr->size; i++) { sc_chan = &sc_ipc->chans[i % 4]; + + /* + * SCU requires that all messages words are written + * sequentially but linux MU driver implements multiple + * independent channels for each register so ordering between + * different channels must be ensured by SCU API interface. + * + * Wait for tx_done before every send to ensure that no + * queueing happens at the mailbox channel level. + */ + wait_for_completion(&sc_chan->tx_done); + reinit_completion(&sc_chan->tx_done); + ret = mbox_send_message(sc_chan->ch, &data[i]); if (ret < 0) return ret; @@ -247,6 +269,11 @@ static int imx_scu_probe(struct platform_device *pdev) cl->knows_txdone = true; cl->rx_callback = imx_scu_rx_callback; + /* Initial tx_done completion as "done" */ + cl->tx_done = imx_scu_tx_done; + init_completion(&sc_chan->tx_done); + complete(&sc_chan->tx_done); + sc_chan->sc_ipc = sc_ipc; sc_chan->idx = i % 4; sc_chan->ch = mbox_request_channel_byname(cl, chan_name); diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c index 4b56a587dacd..d073cb3ce699 100644 --- a/drivers/firmware/imx/misc.c +++ b/drivers/firmware/imx/misc.c @@ -16,7 +16,7 @@ struct imx_sc_msg_req_misc_set_ctrl { u32 ctrl; u32 val; u16 resource; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_cpu_start { struct imx_sc_rpc_msg hdr; @@ -24,18 +24,18 @@ struct imx_sc_msg_req_cpu_start { u32 address_lo; u16 resource; u8 enable; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_misc_get_ctrl { struct imx_sc_rpc_msg hdr; u32 ctrl; u16 resource; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_resp_misc_get_ctrl { struct imx_sc_rpc_msg hdr; u32 val; -} __packed; +} __packed __aligned(4); /* * This function sets a miscellaneous control value. diff --git a/drivers/firmware/imx/scu-pd.c b/drivers/firmware/imx/scu-pd.c index b556612207e5..af3ae0087de4 100644 --- a/drivers/firmware/imx/scu-pd.c +++ b/drivers/firmware/imx/scu-pd.c @@ -61,7 +61,7 @@ struct imx_sc_msg_req_set_resource_power_mode { struct imx_sc_rpc_msg hdr; u16 resource; u8 mode; -} __packed; +} __packed __aligned(4); #define IMX_SCU_PD_NAME_SIZE 20 struct imx_sc_pm_domain { diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig index 92ce6d85802c..4cc0e630ab79 100644 --- a/drivers/fsi/Kconfig +++ b/drivers/fsi/Kconfig @@ -55,6 +55,7 @@ config FSI_MASTER_AST_CF config FSI_MASTER_ASPEED tristate "FSI ASPEED master" + depends on HAS_IOMEM help This option enables a FSI master that is present behind an OPB bridge in the AST2600. diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 08234e64993a..3224933f4c8f 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -253,17 +253,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, lirq->irq = irq; uirq = &priv->uirqs[lirq->index]; if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, dev_name(priv->dev), priv); if (ret) { dev_err(priv->dev, "Could not request underlying irq %d\n", uirq->uirq); - - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); - return ret; } + spin_lock_irqsave(&priv->gc.bgpio_lock, flags); } uirq->refcnt++; @@ -309,8 +308,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) if (index >= 0) { uirq = &priv->uirqs[lirq->index]; uirq->refcnt--; - if (uirq->refcnt == 0) + if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); free_irq(uirq->uirq, priv); + return; + } } spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index a9748b5198e6..67f9f82e0db0 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -147,9 +147,10 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, for (i = 0; i < gc->ngpio; i++) { if (*mask == 0) break; + /* Once finished with an index write it out to the register */ if (index != xgpio_index(chip, i)) { xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET + - xgpio_regoffset(chip, i), + index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]); spin_unlock_irqrestore(&chip->gpio_lock[index], flags); index = xgpio_index(chip, i); @@ -165,7 +166,7 @@ static void xgpio_set_multiple(struct gpio_chip *gc, unsigned long *mask, } xgpio_writereg(chip->regs + XGPIO_DATA_OFFSET + - xgpio_regoffset(chip, i), chip->gpio_state[index]); + index * XGPIO_CHANNEL_OFFSET, chip->gpio_state[index]); spin_unlock_irqrestore(&chip->gpio_lock[index], flags); } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index b696e4598a24..b0e79bed5952 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -147,10 +147,6 @@ static void of_gpio_flags_quirks(struct device_node *np, if (of_property_read_bool(np, "cd-inverted")) *flags ^= OF_GPIO_ACTIVE_LOW; } - if (!strcmp(propname, "wp-gpios")) { - if (of_property_read_bool(np, "wp-inverted")) - *flags ^= OF_GPIO_ACTIVE_LOW; - } } /* * Some GPIO fixed regulator quirks. diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 78a16e42f222..175c6363cf61 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2053,6 +2053,7 @@ static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d, parent_type); chip_info(gc, "alloc_irqs_parent for %d parent hwirq %d\n", irq, parent_hwirq); + irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key); ret = irq_domain_alloc_irqs_parent(d, irq, 1, &parent_fwspec); if (ret) chip_err(gc, @@ -3371,6 +3372,17 @@ int gpiod_is_active_low(const struct gpio_desc *desc) } EXPORT_SYMBOL_GPL(gpiod_is_active_low); +/** + * gpiod_toggle_active_low - toggle whether a GPIO is active-low or not + * @desc: the gpio descriptor to change + */ +void gpiod_toggle_active_low(struct gpio_desc *desc) +{ + VALIDATE_DESC_VOID(desc); + change_bit(FLAG_ACTIVE_LOW, &desc->flags); +} +EXPORT_SYMBOL_GPL(gpiod_toggle_active_low); + /* I/O calls are only valid after configuration completed; the relevant * "is this a valid GPIO" error checks should already have been done. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 72232fccf61a..9ba80d828876 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -338,17 +338,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -373,15 +365,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 8e6726e0d035..7ab386deb52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -694,11 +694,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - if (size & 3 || *pos & 3) + if (size > 4096 || size & 3 || *pos & 3) return -EINVAL; /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); + offset = (*pos & GENMASK_ULL(11, 0)) >> 2; se = (*pos & GENMASK_ULL(19, 12)) >> 12; sh = (*pos & GENMASK_ULL(27, 20)) >> 20; cu = (*pos & GENMASK_ULL(35, 28)) >> 28; @@ -729,7 +729,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = data[offset++]; + value = data[result >> 2]; r = put_user(value, (uint32_t *)buf); if (r) { result = r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c17505fba988..332b9c24a2cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3639,8 +3639,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_amdkfd_pre_reset(adev); - /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 30a1e3ac21d6..4169abc32219 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1357,7 +1357,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_ATOMIC | + DRIVER_ATOMIC | DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b499a3de8bb6..c75cc97eca44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -192,6 +192,7 @@ struct amdgpu_gmc { uint32_t srbm_soft_reset; bool prt_warning; uint64_t stolen_size; + uint32_t sdpif_register; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba9e53a1abc3..1a80423b1d4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -54,7 +54,7 @@ * In bring-up phase, it just used primary ring so set gfx ring number as 1 at * first. */ -#define GFX10_NUM_GFX_RINGS 2 +#define GFX10_NUM_GFX_RINGS_NV1X 1 #define GFX10_MEC_HPD_SIZE 2048 #define F32_CE_PROGRAM_RAM_SIZE 65536 @@ -1286,7 +1286,7 @@ static int gfx_v10_0_sw_init(void *handle) case CHIP_NAVI14: case CHIP_NAVI12: adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 2; + adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -2692,18 +2692,20 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_commit(ring); /* submit cs packet to copy state 0 to next available state */ - ring = &adev->gfx.gfx_ring[1]; - r = amdgpu_ring_alloc(ring, 2); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); - return r; - } - - amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); - amdgpu_ring_write(ring, 0); + if (adev->gfx.num_gfx_rings > 1) { + /* maximum supported gfx ring is 2 */ + ring = &adev->gfx.gfx_ring[1]; + r = amdgpu_ring_alloc(ring, 2); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); + return r; + } - amdgpu_ring_commit(ring); + amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_commit(ring); + } return 0; } @@ -2800,39 +2802,41 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); /* Init gfx ring 1 for pipe 1 */ - mutex_lock(&adev->srbm_mutex); - gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); - ring = &adev->gfx.gfx_ring[1]; - rb_bufsz = order_base_2(ring->ring_size / 8); - tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); - WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); - /* Initialize the ring buffer's write pointers */ - ring->wptr = 0; - WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); - /* Set the wb address wether it's enabled or not */ - rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); - WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & - CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, - upper_32_bits(wptr_gpu_addr)); - - mdelay(1); - WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); - - rb_addr = ring->gpu_addr >> 8; - WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); - WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); - WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); - - gfx_v10_0_cp_gfx_set_doorbell(adev, ring); - mutex_unlock(&adev->srbm_mutex); - + if (adev->gfx.num_gfx_rings > 1) { + mutex_lock(&adev->srbm_mutex); + gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID1); + /* maximum supported gfx ring is 2 */ + ring = &adev->gfx.gfx_ring[1]; + rb_bufsz = order_base_2(ring->ring_size / 8); + tmp = REG_SET_FIELD(0, CP_RB1_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, CP_RB1_CNTL, RB_BLKSZ, rb_bufsz - 2); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + /* Initialize the ring buffer's write pointers */ + ring->wptr = 0; + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, mmCP_RB1_WPTR_HI, upper_32_bits(ring->wptr)); + /* Set the wb address wether it's enabled or not */ + rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR, lower_32_bits(rptr_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & + CP_RB1_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + + mdelay(1); + WREG32_SOC15(GC, 0, mmCP_RB1_CNTL, tmp); + + rb_addr = ring->gpu_addr >> 8; + WREG32_SOC15(GC, 0, mmCP_RB1_BASE, rb_addr); + WREG32_SOC15(GC, 0, mmCP_RB1_BASE_HI, upper_32_bits(rb_addr)); + WREG32_SOC15(GC, 0, mmCP_RB1_ACTIVE, 1); + + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + mutex_unlock(&adev->srbm_mutex); + } /* Switch to pipe 0 */ mutex_lock(&adev->srbm_mutex); gfx_v10_0_cp_gfx_switch_pipe(adev, PIPE_ID0); @@ -3492,6 +3496,7 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); @@ -3909,11 +3914,13 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); return clock; } @@ -3950,7 +3957,8 @@ static int gfx_v10_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS; + adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v10_0_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 97105a5bb246..67f30fec94df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3538,6 +3538,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0); amdgpu_ring_clear_ring(ring); } else { amdgpu_ring_clear_ring(ring); @@ -3852,6 +3853,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; + amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { uint32_t tmp, lsb, msb, i = 0; @@ -3870,6 +3872,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); } mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index a5b68b5e452f..0b88c9f877ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1203,6 +1203,19 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) } } +/** + * gmc_v9_0_restore_registers - restores regs + * + * @adev: amdgpu_device pointer + * + * This restores register values, saved at suspend. + */ +static void gmc_v9_0_restore_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + WREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); +} + /** * gmc_v9_0_gart_enable - gart enable * @@ -1307,6 +1320,20 @@ static int gmc_v9_0_hw_init(void *handle) return r; } +/** + * gmc_v9_0_save_registers - saves regs + * + * @adev: amdgpu_device pointer + * + * This saves potential register values that should be + * restored upon resume + */ +static void gmc_v9_0_save_registers(struct amdgpu_device *adev) +{ + if (adev->asic_type == CHIP_RAVEN) + adev->gmc.sdpif_register = RREG32(mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); +} + /** * gmc_v9_0_gart_disable - gart disable * @@ -1343,9 +1370,16 @@ static int gmc_v9_0_hw_fini(void *handle) static int gmc_v9_0_suspend(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gmc_v9_0_hw_fini(adev); + r = gmc_v9_0_hw_fini(adev); + if (r) + return r; + + gmc_v9_0_save_registers(adev); + + return 0; } static int gmc_v9_0_resume(void *handle) @@ -1353,6 +1387,7 @@ static int gmc_v9_0_resume(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v9_0_restore_registers(adev); r = gmc_v9_0_hw_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 0d8767eb7a70..1c3a7d4bb65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -269,7 +269,11 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_device_should_recover_gpu(adev)) + if (amdgpu_device_should_recover_gpu(adev) + && (adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT || + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || + adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) amdgpu_device_gpu_recover(adev, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 0ba66bef5746..de40bf12c4a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -701,6 +701,12 @@ static int nv_common_early_init(void *handle) adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_ATHUB; + /* guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * as a consequence, the rev_id and external_rev_id are wrong. + * workaround it by hardcoding rev_id to 0 (default value). + */ + if (amdgpu_sriov_vf(adev)) + adev->rev_id = 0; adev->external_rev_id = adev->rev_id + 0xa; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8e1640bc07af..624e223175c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -270,7 +270,12 @@ static u32 soc15_get_config_memsize(struct amdgpu_device *adev) static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } @@ -1145,9 +1150,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_VCN | - AMD_PG_SUPPORT_VCN_DPG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } else if (adev->pdev->device == 0x15d8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -1190,9 +1193,7 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_VCN | - AMD_PG_SUPPORT_VCN_DPG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } break; case CHIP_ARCTURUS: diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 839f186e1182..19e870c79896 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -52,6 +52,7 @@ uint32_t old_ = 0; \ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ uint32_t loop = adev->usec_timeout; \ + ret = 0; \ while ((tmp_ & (mask)) != (expected_value)) { \ if (old_ != tmp_) { \ loop = adev->usec_timeout; \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 15c523027285..511712c2e382 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -93,7 +93,7 @@ void kfd_debugfs_init(void) kfd_debugfs_hqds_by_device, &kfd_debugfs_fops); debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, &kfd_debugfs_fops); - debugfs_create_file("hang_hws", S_IFREG | 0644, debugfs_root, + debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root, NULL, &kfd_debugfs_hang_hws_fops); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 984c2f2b24b6..d128a8bbe19d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1225,16 +1225,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(&q->list, &qpd->queues_list); qpd->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; + if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 11e5784aa62a..34f483ac36ca 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -97,8 +97,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps( (struct edid *) edid->raw_edid); sad_count = drm_edid_to_sad((struct edid *) edid->raw_edid, &sads); - if (sad_count < 0) - DRM_ERROR("Couldn't read SADs: %d\n", sad_count); if (sad_count <= 0) return result; @@ -246,7 +244,8 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( drm_dp_mst_reset_vcpi_slots(mst_mgr, mst_port); } - ret = drm_dp_update_payload_part1(mst_mgr); + /* It's OK for this to fail */ + drm_dp_update_payload_part1(mst_mgr); /* mst_mgr->->payloads are VC payload notify MST branch using DPCD or * AUX message. The sequence is slot 1-63 allocated sequence for each @@ -255,9 +254,6 @@ bool dm_helpers_dp_mst_write_payload_allocation_table( get_payload_table(aconnector, proposed_table); - if (ret) - return false; - return true; } @@ -315,7 +311,6 @@ bool dm_helpers_dp_mst_send_payload_allocation( struct amdgpu_dm_connector *aconnector; struct drm_dp_mst_topology_mgr *mst_mgr; struct drm_dp_mst_port *mst_port; - int ret; aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; @@ -329,10 +324,8 @@ bool dm_helpers_dp_mst_send_payload_allocation( if (!mst_mgr->mst_state) return false; - ret = drm_dp_update_payload_part2(mst_mgr); - - if (ret) - return false; + /* It's OK for this to fail */ + drm_dp_update_payload_part2(mst_mgr); if (!enable) drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 2bf8534c18fb..1e3bc708b2e8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -382,6 +382,7 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, aconnector->dc_sink); dc_sink_release(aconnector->dc_sink); aconnector->dc_sink = NULL; + aconnector->dc_link->cur_link_settings.lane_count = 0; } drm_connector_unregister(connector); diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 9b2cb57bf2ba..c9a241fe46cf 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1438,6 +1438,7 @@ void dcn_bw_update_from_pplib(struct dc *dc) struct dc_context *ctx = dc->ctx; struct dm_pp_clock_levels_with_voltage fclks = {0}, dcfclks = {0}; bool res; + unsigned vmin0p65_idx, vmid0p72_idx, vnom0p8_idx, vmax0p9_idx; /* TODO: This is not the proper way to obtain fabric_and_dram_bandwidth, should be min(fclk, memclk) */ res = dm_pp_get_clock_levels_by_type_with_voltage( @@ -1449,17 +1450,28 @@ void dcn_bw_update_from_pplib(struct dc *dc) res = verify_clock_values(&fclks); if (res) { - ASSERT(fclks.num_levels >= 3); - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 32 * (fclks.data[0].clocks_in_khz / 1000.0) / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - (fclks.num_levels > 2 ? 3 : 2)].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - 2].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = dc->dcn_soc->number_of_channels * - (fclks.data[fclks.num_levels - 1].clocks_in_khz / 1000.0) - * ddr4_dram_factor_single_Channel / 1000.0; + ASSERT(fclks.num_levels); + + vmin0p65_idx = 0; + vmid0p72_idx = fclks.num_levels - + (fclks.num_levels > 2 ? 3 : (fclks.num_levels > 1 ? 2 : 1)); + vnom0p8_idx = fclks.num_levels - (fclks.num_levels > 1 ? 2 : 1); + vmax0p9_idx = fclks.num_levels - 1; + + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = + 32 * (fclks.data[vmin0p65_idx].clocks_in_khz / 1000.0) / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = + dc->dcn_soc->number_of_channels * + (fclks.data[vmid0p72_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = + dc->dcn_soc->number_of_channels * + (fclks.data[vnom0p8_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = + dc->dcn_soc->number_of_channels * + (fclks.data[vmax0p9_idx].clocks_in_khz / 1000.0) + * ddr4_dram_factor_single_Channel / 1000.0; } else BREAK_TO_DEBUGGER(); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index b864869cc7e3..6fa7422c51da 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -91,6 +91,12 @@ ifdef CONFIG_DRM_AMD_DC_DCN2_1 ############################################################################### CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o +# prevent build errors regarding soft-float vs hard-float FP ABI tags +# this code is currently unused on ppc64, as it applies to Renoir APUs only +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) +endif + AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 25d7b7c6681c..7dca2e6eb3bc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -100,13 +100,13 @@ uint32_t dentist_get_did_from_divider(int divider) } void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, - struct dc_state *context) + struct dc_state *context, bool safe_to_lower) { int i; clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz; for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { - int dpp_inst, dppclk_khz; + int dpp_inst, dppclk_khz, prev_dppclk_khz; /* Loop index will match dpp->inst if resource exists, * and we want to avoid dependency on dpp object @@ -114,8 +114,12 @@ void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, dpp_inst = i; dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; - clk_mgr->dccg->funcs->update_dpp_dto( - clk_mgr->dccg, dpp_inst, dppclk_khz); + prev_dppclk_khz = clk_mgr->base.ctx->dc->current_state->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + + if (safe_to_lower || prev_dppclk_khz < dppclk_khz) { + clk_mgr->dccg->funcs->update_dpp_dto( + clk_mgr->dccg, dpp_inst, dppclk_khz); + } } } @@ -240,7 +244,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) { if (dpp_clock_lowered) { // if clock is being lowered, increase DTO before lowering refclk - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); dcn20_update_clocks_update_dentist(clk_mgr); } else { // if clock is being raised, increase refclk before lowering DTO @@ -248,7 +252,7 @@ void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, dcn20_update_clocks_update_dentist(clk_mgr); // always update dtos unless clock is lowered and not safe to lower if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h index c9fd824f3c23..74ccd6c04134 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h @@ -34,7 +34,7 @@ void dcn2_update_clocks_fpga(struct clk_mgr *clk_mgr, struct dc_state *context, bool safe_to_lower); void dcn20_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, - struct dc_state *context); + struct dc_state *context, bool safe_to_lower); void dcn2_init_clocks(struct clk_mgr *clk_mgr); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 35c55e54eac0..5f683d118d2a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -149,6 +149,12 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz); } + // workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + if (new_clocks->dppclk_khz < 100000) + new_clocks->dppclk_khz = 100000; + } + if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) dpp_clock_lowered = true; @@ -164,16 +170,16 @@ void rn_update_clocks(struct clk_mgr *clk_mgr_base, } if (dpp_clock_lowered) { - // if clock is being lowered, increase DTO before lowering refclk - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + // increase per DPP DTO before lowering global dppclk + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); } else { - // if clock is being raised, increase refclk before lowering DTO + // increase global DPPCLK before lowering per DPP DTO if (update_dppclk || update_dispclk) rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); // always update dtos unless clock is lowered and not safe to lower if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } if (update_dispclk && diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 32f31bf91915..8904a85186aa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2396,12 +2396,7 @@ void dc_set_power_state( enum dc_acpi_cm_power_state power_state) { struct kref refcount; - struct display_mode_lib *dml = kzalloc(sizeof(struct display_mode_lib), - GFP_KERNEL); - - ASSERT(dml); - if (!dml) - return; + struct display_mode_lib *dml; switch (power_state) { case DC_ACPI_CM_POWER_STATE_D0: @@ -2423,6 +2418,12 @@ void dc_set_power_state( * clean state, and dc hw programming optimizations will not * cause any trouble. */ + dml = kzalloc(sizeof(struct display_mode_lib), + GFP_KERNEL); + + ASSERT(dml); + if (!dml) + return; /* Preserve refcount */ refcount = dc->current_state->refcount; @@ -2436,10 +2437,10 @@ void dc_set_power_state( dc->current_state->refcount = refcount; dc->current_state->bw_ctx.dml = *dml; + kfree(dml); + break; } - - kfree(dml); } void dc_resume(struct dc *dc) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 4619f94f0ac7..70846ae7d854 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -968,8 +968,7 @@ static bool dc_link_detect_helper(struct dc_link *link, same_edid = is_same_edid(&prev_sink->dc_edid, &sink->dc_edid); if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && - sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX && - reason != DETECT_REASON_HPDRX) { + sink_caps.transaction_type == DDC_TRANSACTION_TYPE_I2C_OVER_AUX) { /* * TODO debug why Dell 2413 doesn't like * two link trainings diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 0416a17b0897..320f4eeebf84 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -417,6 +417,8 @@ struct dc_debug_options { bool cm_in_bypass; #endif int force_clock_mode;/*every mode change.*/ + + bool nv12_iflip_vm_wa; }; struct dc_debug_data { diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c index 793c0cec407f..5fcffb29317e 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c @@ -398,7 +398,7 @@ static bool acquire( { enum gpio_result result; - if (!is_engine_available(engine)) + if ((engine == NULL) || !is_engine_available(engine)) return false; result = dal_ddc_open(ddc, GPIO_MODE_HARDWARE, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index a02c10e23e0d..d163388c99a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -840,8 +840,8 @@ static void hubbub1_det_request_size( hubbub1_get_blk256_size(&blk256_width, &blk256_height, bpe); - swath_bytes_horz_wc = height * blk256_height * bpe; - swath_bytes_vert_wc = width * blk256_width * bpe; + swath_bytes_horz_wc = width * blk256_height * bpe; + swath_bytes_vert_wc = height * blk256_width * bpe; *req128_horz_wc = (2 * swath_bytes_horz_wc <= detile_buf_size) ? false : /* full 256B request */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index ac8c18fadefc..448bc9b39942 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -493,7 +493,6 @@ static void dcn20_plane_atomic_disable(struct dc *dc, struct pipe_ctx *pipe_ctx) dpp->funcs->dpp_dppclk_control(dpp, false, false); hubp->power_gated = true; - dc->optimized_required = false; /* We're powering off, no need to optimize */ dc->hwss.plane_atomic_power_down(dc, pipe_ctx->plane_res.dpp, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c index 3b613fb93ef8..0162d3ffe268 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_optc.c @@ -233,12 +233,13 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c struct dc_crtc_timing *timing) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192 */ int mpcc_hactive = (timing->h_addressable + timing->h_border_left + timing->h_border_right) / opp_cnt; - int memory_mask = mpcc_hactive <= 2560 ? 0x3 : 0xf; + uint32_t memory_mask; uint32_t data_fmt = 0; + ASSERT(opp_cnt == 2); + /* TODO: In pseudocode but does not affect maximus, delete comment if we dont need on asic * REG_SET(OTG_GLOBAL_CONTROL2, 0, GLOBAL_UPDATE_LOCK_EN, 1); * Program OTG register MASTER_UPDATE_LOCK_DB_X/Y to the position before DP frame start @@ -246,9 +247,17 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c * MASTER_UPDATE_LOCK_DB_X, 160, * MASTER_UPDATE_LOCK_DB_Y, 240); */ + + /* 2 pieces of memory required for up to 5120 displays, 4 for up to 8192, + * however, for ODM combine we can simplify by always using 4. + * To make sure there's no overlap, each instance "reserves" 2 memories and + * they are uniquely combined here. + */ + memory_mask = 0x3 << (opp_id[0] * 2) | 0x3 << (opp_id[1] * 2); + if (REG(OPTC_MEMORY_CONFIG)) REG_SET(OPTC_MEMORY_CONFIG, 0, - OPTC_MEM_SEL, memory_mask << (optc->inst * 4)); + OPTC_MEM_SEL, memory_mask); if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422) data_fmt = 1; @@ -257,7 +266,6 @@ void optc2_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_c REG_UPDATE(OPTC_DATA_FORMAT_CONTROL, OPTC_DATA_FORMAT, data_fmt); - ASSERT(opp_cnt == 2); REG_SET_3(OPTC_DATA_SOURCE_SELECT, 0, OPTC_NUM_OF_INPUT_SEGMENT, 1, OPTC_SEG0_SRC_SEL, opp_id[0], diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index 2f5a5867e674..1ddd6ae22155 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -164,6 +164,69 @@ static void hubp21_setup( } +void hubp21_set_viewport( + struct hubp *hubp, + const struct rect *viewport, + const struct rect *viewport_c) +{ + struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); + int patched_viewport_height = 0; + struct dc_debug_options *debug = &hubp->ctx->dc->debug; + + REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION, 0, + PRI_VIEWPORT_WIDTH, viewport->width, + PRI_VIEWPORT_HEIGHT, viewport->height); + + REG_SET_2(DCSURF_PRI_VIEWPORT_START, 0, + PRI_VIEWPORT_X_START, viewport->x, + PRI_VIEWPORT_Y_START, viewport->y); + + /*for stereo*/ + REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION, 0, + SEC_VIEWPORT_WIDTH, viewport->width, + SEC_VIEWPORT_HEIGHT, viewport->height); + + REG_SET_2(DCSURF_SEC_VIEWPORT_START, 0, + SEC_VIEWPORT_X_START, viewport->x, + SEC_VIEWPORT_Y_START, viewport->y); + + /* + * Work around for underflow issue with NV12 + rIOMMU translation + * + immediate flip. This will cause hubp underflow, but will not + * be user visible since underflow is in blank region + */ + patched_viewport_height = viewport_c->height; + if (viewport_c->height != 0 && debug->nv12_iflip_vm_wa) { + int pte_row_height = 0; + int pte_rows = 0; + + REG_GET(DCHUBP_REQ_SIZE_CONFIG, + PTE_ROW_HEIGHT_LINEAR, &pte_row_height); + + pte_row_height = 1 << (pte_row_height + 3); + pte_rows = (viewport_c->height + pte_row_height - 1) / pte_row_height; + patched_viewport_height = pte_rows * pte_row_height + 3; + } + + + /* DC supports NV12 only at the moment */ + REG_SET_2(DCSURF_PRI_VIEWPORT_DIMENSION_C, 0, + PRI_VIEWPORT_WIDTH_C, viewport_c->width, + PRI_VIEWPORT_HEIGHT_C, patched_viewport_height); + + REG_SET_2(DCSURF_PRI_VIEWPORT_START_C, 0, + PRI_VIEWPORT_X_START_C, viewport_c->x, + PRI_VIEWPORT_Y_START_C, viewport_c->y); + + REG_SET_2(DCSURF_SEC_VIEWPORT_DIMENSION_C, 0, + SEC_VIEWPORT_WIDTH_C, viewport_c->width, + SEC_VIEWPORT_HEIGHT_C, patched_viewport_height); + + REG_SET_2(DCSURF_SEC_VIEWPORT_START_C, 0, + SEC_VIEWPORT_X_START_C, viewport_c->x, + SEC_VIEWPORT_Y_START_C, viewport_c->y); +} + void hubp21_set_vm_system_aperture_settings(struct hubp *hubp, struct vm_system_aperture_param *apt) { @@ -211,7 +274,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_vm_system_aperture_settings = hubp21_set_vm_system_aperture_settings, .set_blank = hubp1_set_blank, .dcc_control = hubp1_dcc_control, - .mem_program_viewport = min_set_viewport, + .mem_program_viewport = hubp21_set_viewport, .set_cursor_attributes = hubp2_cursor_set_attributes, .set_cursor_position = hubp1_cursor_set_position, .hubp_clk_cntl = hubp1_clk_cntl, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index b29b2c99a564..77741b18c85b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -57,6 +57,7 @@ #include "dcn20/dcn20_dccg.h" #include "dcn21_hubbub.h" #include "dcn10/dcn10_resource.h" +#include "dce110/dce110_resource.h" #include "dcn20/dcn20_dwb.h" #include "dcn20/dcn20_mmhubbub.h" @@ -847,6 +848,7 @@ static const struct dc_debug_options debug_defaults_drv = { .scl_reset_length10 = true, .sanity_checks = true, .disable_48mhz_pwrdwn = false, + .nv12_iflip_vm_wa = true }; static const struct dc_debug_options debug_defaults_diags = { @@ -866,6 +868,7 @@ static const struct dc_debug_options debug_defaults_diags = { enum dcn20_clk_src_array_id { DCN20_CLK_SRC_PLL0, DCN20_CLK_SRC_PLL1, + DCN20_CLK_SRC_PLL2, DCN20_CLK_SRC_TOTAL_DCN21 }; @@ -1351,12 +1354,6 @@ struct display_stream_compressor *dcn21_dsc_create( static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) { - /* - TODO: Fix this function to calcualte correct values. - There are known issues with this function currently - that will need to be investigated. Use hardcoded known good values for now. - - struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; int i; @@ -1371,11 +1368,10 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn2_1_soc.clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; dcn2_1_soc.clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; dcn2_1_soc.clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 16 / 1000; + dcn2_1_soc.clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; } - dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - i]; + dcn2_1_soc.clock_limits[i] = dcn2_1_soc.clock_limits[i - 1]; dcn2_1_soc.num_states = i; - */ } /* Temporary Place holder until we can get them from fuse */ @@ -1736,6 +1732,10 @@ static bool construct( dcn21_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); + pool->base.clock_sources[DCN20_CLK_SRC_PLL2] = + dcn21_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs[2], false); pool->base.clk_src_count = DCN20_CLK_SRC_TOTAL_DCN21; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c index b953b02a1512..723af0b2dda0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_common_defs.c @@ -24,7 +24,7 @@ */ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_inline_defs.h" diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h index eca140da13d8..ded71ea82413 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_inline_defs.h @@ -27,7 +27,7 @@ #define __DML_INLINE_DEFS_H__ #include "dml_common_defs.h" -#include "../calcs/dcn_calc_math.h" +#include "dcn_calc_math.h" #include "dml_logger.h" static inline double dml_min(double a, double b) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h similarity index 100% rename from drivers/gpu/drm/amd/display/dc/calcs/dcn_calc_math.h rename to drivers/gpu/drm/amd/display/dc/inc/dcn_calc_math.h diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 5437b50e9f90..d9ea4ae690af 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -807,6 +807,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, 2 * in_out_vrr->min_refresh_in_uhz) in_out_vrr->btr.btr_enabled = false; + in_out_vrr->fixed.fixed_active = false; in_out_vrr->btr.btr_active = false; in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; @@ -826,6 +827,7 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync, in_out_vrr->adjust.v_total_max = stream->timing.v_total; } else if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && refresh_range >= MIN_REFRESH_RANGE_IN_US) { + in_out_vrr->adjust.v_total_min = calc_v_total_from_refresh(stream, in_out_vrr->max_refresh_in_uhz); diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h index b6f74bf4af02..27bb8c1ab858 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_12_0_offset.h @@ -7376,6 +7376,8 @@ #define mmCRTC4_CRTC_DRR_CONTROL 0x0f3e #define mmCRTC4_CRTC_DRR_CONTROL_BASE_IDX 2 +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0 0x395d +#define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX 2 // addressBlock: dce_dc_fmt4_dispdec // base address: 0x2000 diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index a23729d3174b..04fea3cc0cfa 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -21,6 +21,7 @@ */ #include +#include #include "pp_debug.h" #include "amdgpu.h" @@ -221,7 +222,7 @@ int smu_set_soft_freq_range(struct smu_context *smu, enum smu_clk_type clk_type, { int ret = 0; - if (min <= 0 && max <= 0) + if (min < 0 && max < 0) return -EINVAL; if (!smu_clk_dpm_is_enabled(smu, clk_type)) @@ -1125,6 +1126,23 @@ static int smu_smc_table_hw_init(struct smu_context *smu, ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false); if (ret) return ret; + + if (adev->asic_type == CHIP_NAVI10) { + if ((adev->pdev->device == 0x731f && (adev->pdev->revision == 0xc2 || + adev->pdev->revision == 0xc3 || + adev->pdev->revision == 0xca || + adev->pdev->revision == 0xcb)) || + (adev->pdev->device == 0x66af && (adev->pdev->revision == 0xf3 || + adev->pdev->revision == 0xf4 || + adev->pdev->revision == 0xf5 || + adev->pdev->revision == 0xf6))) { + ret = smu_disable_umc_cdr_12gbps_workaround(smu); + if (ret) { + pr_err("Workaround failed to disable UMC CDR feature on 12Gbps SKU!\n"); + return ret; + } + } + } } /* diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 1115761982a7..fed3fc4bb57a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -1026,12 +1026,15 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr, clocks->num_levels = 0; for (i = 0; i < pclk_vol_table->count; i++) { - clocks->data[i].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; - clocks->data[i].latency_in_us = latency_required ? - smu10_get_mem_latency(hwmgr, - pclk_vol_table->entries[i].clk) : - 0; - clocks->num_levels++; + if (pclk_vol_table->entries[i].clk) { + clocks->data[clocks->num_levels].clocks_in_khz = + pclk_vol_table->entries[i].clk * 10; + clocks->data[clocks->num_levels].latency_in_us = latency_required ? + smu10_get_mem_latency(hwmgr, + pclk_vol_table->entries[i].clk) : + 0; + clocks->num_levels++; + } } return 0; @@ -1077,9 +1080,11 @@ static int smu10_get_clock_by_type_with_voltage(struct pp_hwmgr *hwmgr, clocks->num_levels = 0; for (i = 0; i < pclk_vol_table->count; i++) { - clocks->data[i].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; - clocks->data[i].voltage_in_mv = pclk_vol_table->entries[i].vol; - clocks->num_levels++; + if (pclk_vol_table->entries[i].clk) { + clocks->data[clocks->num_levels].clocks_in_khz = pclk_vol_table->entries[i].clk * 10; + clocks->data[clocks->num_levels].voltage_in_mv = pclk_vol_table->entries[i].vol; + clocks->num_levels++; + } } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 41fce75b263f..d005dea4a3bf 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -263,6 +263,7 @@ struct smu_table_context uint8_t thermal_controller_type; void *overdrive_table; + void *boot_overdrive_table; }; struct smu_dpm_context { @@ -550,6 +551,7 @@ struct pptable_funcs { int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); uint32_t (*get_pptable_power_limit)(struct smu_context *smu); + int (*disable_umc_cdr_12gbps_workaround)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h index d8c9b7f91fcc..a5b4df146713 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_types.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_types.h @@ -170,6 +170,8 @@ __SMU_DUMMY_MAP(SetSoftMinJpeg), \ __SMU_DUMMY_MAP(SetHardMinFclkByFreq), \ __SMU_DUMMY_MAP(DFCstateControl), \ + __SMU_DUMMY_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE), \ + __SMU_DUMMY_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE), \ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h index 373861ddccd0..406bfd187ce8 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_ppsmc.h @@ -120,7 +120,10 @@ #define PPSMC_MSG_GetVoltageByDpmOverdrive 0x45 #define PPSMC_MSG_BacoAudioD3PME 0x48 -#define PPSMC_Message_Count 0x49 +#define PPSMC_MSG_DALDisableDummyPstateChange 0x49 +#define PPSMC_MSG_DALEnableDummyPstateChange 0x4A + +#define PPSMC_Message_Count 0x4B typedef uint32_t PPSMC_Result; typedef uint32_t PPSMC_Msg; diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index b2f96a101124..7a63cf8e85ed 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -39,21 +39,39 @@ #define SMU_11_0_PP_OVERDRIVE_VERSION 0x0800 #define SMU_11_0_PP_POWERSAVINGCLOCK_VERSION 0x0100 +enum SMU_11_0_ODFEATURE_CAP { + SMU_11_0_ODCAP_GFXCLK_LIMITS = 0, + SMU_11_0_ODCAP_GFXCLK_CURVE, + SMU_11_0_ODCAP_UCLK_MAX, + SMU_11_0_ODCAP_POWER_LIMIT, + SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, + SMU_11_0_ODCAP_FAN_SPEED_MIN, + SMU_11_0_ODCAP_TEMPERATURE_FAN, + SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, + SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, + SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, + SMU_11_0_ODCAP_AUTO_UV_ENGINE, + SMU_11_0_ODCAP_AUTO_OC_ENGINE, + SMU_11_0_ODCAP_AUTO_OC_MEMORY, + SMU_11_0_ODCAP_FAN_CURVE, + SMU_11_0_ODCAP_COUNT, +}; + enum SMU_11_0_ODFEATURE_ID { - SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << 0, //GFXCLK Limit feature - SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << 1, //GFXCLK Curve feature - SMU_11_0_ODFEATURE_UCLK_MAX = 1 << 2, //UCLK Limit feature - SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << 3, //Power Limit feature - SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << 4, //Fan Acoustic RPM feature - SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << 5, //Minimum Fan Speed feature - SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << 6, //Fan Target Temperature Limit feature - SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << 7, //Operating Temperature Limit feature - SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << 8, //AC Timing Tuning feature - SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << 9, //Zero RPM feature - SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << 10, //Auto Under Volt GFXCLK feature - SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << 11, //Auto Over Clock GFXCLK feature - SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << 12, //Auto Over Clock MCLK feature - SMU_11_0_ODFEATURE_FAN_CURVE = 1 << 13, //VICTOR TODO + SMU_11_0_ODFEATURE_GFXCLK_LIMITS = 1 << SMU_11_0_ODCAP_GFXCLK_LIMITS, //GFXCLK Limit feature + SMU_11_0_ODFEATURE_GFXCLK_CURVE = 1 << SMU_11_0_ODCAP_GFXCLK_CURVE, //GFXCLK Curve feature + SMU_11_0_ODFEATURE_UCLK_MAX = 1 << SMU_11_0_ODCAP_UCLK_MAX, //UCLK Limit feature + SMU_11_0_ODFEATURE_POWER_LIMIT = 1 << SMU_11_0_ODCAP_POWER_LIMIT, //Power Limit feature + SMU_11_0_ODFEATURE_FAN_ACOUSTIC_LIMIT = 1 << SMU_11_0_ODCAP_FAN_ACOUSTIC_LIMIT, //Fan Acoustic RPM feature + SMU_11_0_ODFEATURE_FAN_SPEED_MIN = 1 << SMU_11_0_ODCAP_FAN_SPEED_MIN, //Minimum Fan Speed feature + SMU_11_0_ODFEATURE_TEMPERATURE_FAN = 1 << SMU_11_0_ODCAP_TEMPERATURE_FAN, //Fan Target Temperature Limit feature + SMU_11_0_ODFEATURE_TEMPERATURE_SYSTEM = 1 << SMU_11_0_ODCAP_TEMPERATURE_SYSTEM, //Operating Temperature Limit feature + SMU_11_0_ODFEATURE_MEMORY_TIMING_TUNE = 1 << SMU_11_0_ODCAP_MEMORY_TIMING_TUNE, //AC Timing Tuning feature + SMU_11_0_ODFEATURE_FAN_ZERO_RPM_CONTROL = 1 << SMU_11_0_ODCAP_FAN_ZERO_RPM_CONTROL, //Zero RPM feature + SMU_11_0_ODFEATURE_AUTO_UV_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_UV_ENGINE, //Auto Under Volt GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_ENGINE = 1 << SMU_11_0_ODCAP_AUTO_OC_ENGINE, //Auto Over Clock GFXCLK feature + SMU_11_0_ODFEATURE_AUTO_OC_MEMORY = 1 << SMU_11_0_ODCAP_AUTO_OC_MEMORY, //Auto Over Clock MCLK feature + SMU_11_0_ODFEATURE_FAN_CURVE = 1 << SMU_11_0_ODCAP_FAN_CURVE, //Fan Curve feature SMU_11_0_ODFEATURE_COUNT = 14, }; #define SMU_11_0_MAX_ODFEATURE 32 //Maximum Number of OD Features diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index ca62e92e5a4f..2cf81cafc669 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -119,6 +119,10 @@ static struct smu_11_0_cmn2aisc_mapping navi10_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PowerDownJpeg, PPSMC_MSG_PowerDownJpeg), MSG_MAP(BacoAudioD3PME, PPSMC_MSG_BacoAudioD3PME), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3), + MSG_MAP(DAL_DISABLE_DUMMY_PSTATE_CHANGE,PPSMC_MSG_DALDisableDummyPstateChange), + MSG_MAP(DAL_ENABLE_DUMMY_PSTATE_CHANGE, PPSMC_MSG_DALEnableDummyPstateChange), + MSG_MAP(GetVoltageByDpm, PPSMC_MSG_GetVoltageByDpm), + MSG_MAP(GetVoltageByDpmOverdrive, PPSMC_MSG_GetVoltageByDpmOverdrive), }; static struct smu_11_0_cmn2aisc_mapping navi10_clk_map[SMU_CLK_COUNT] = { @@ -701,11 +705,20 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } -static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_CAP cap) { - return od_table->cap[feature]; + return od_table->cap[cap]; } +static void navi10_od_setting_get_range(struct smu_11_0_overdrive_table *od_table, + enum SMU_11_0_ODSETTING_ID setting, + uint32_t *min, uint32_t *max) +{ + if (min) + *min = od_table->min[setting]; + if (max) + *max = od_table->max[setting]; +} static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) @@ -724,6 +737,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, OverDriveTable_t *od_table = (OverDriveTable_t *)table_context->overdrive_table; struct smu_11_0_overdrive_table *od_settings = smu->od_settings; + uint32_t min_value, max_value; switch (clk_type) { case SMU_GFXCLK: @@ -801,7 +815,7 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_SCLK: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) break; size += sprintf(buf + size, "OD_SCLK:\n"); size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); @@ -809,15 +823,15 @@ static int navi10_print_clk_levels(struct smu_context *smu, case SMU_OD_MCLK: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) break; size += sprintf(buf + size, "OD_MCLK:\n"); - size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + size += sprintf(buf + size, "1: %uMHz\n", od_table->UclkFmax); break; case SMU_OD_VDDC_CURVE: if (!smu->od_enabled || !od_table || !od_settings) break; - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) break; size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); for (i = 0; i < 3; i++) { @@ -836,6 +850,55 @@ static int navi10_print_clk_levels(struct smu_context *smu, } size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); } + break; + case SMU_OD_RANGE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + size = sprintf(buf, "%s:\n", "OD_RANGE"); + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMIN, + &min_value, NULL); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_GFXCLKFMAX, + NULL, &max_value); + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, + &min_value, &max_value); + size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) { + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n", + min_value, max_value); + navi10_od_setting_get_range(od_settings, SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3, + &min_value, &max_value); + size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n", + min_value, max_value); + } + break; default: break; @@ -1780,6 +1843,28 @@ static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_tab return 0; } +static int navi10_overdrive_get_gfx_clk_base_voltage(struct smu_context *smu, + uint16_t *voltage, + uint32_t freq) +{ + uint32_t param = (freq & 0xFFFF) | (PPCLK_GFXCLK << 16); + uint32_t value = 0; + int ret; + + ret = smu_send_smc_msg_with_param(smu, + SMU_MSG_GetVoltageByDpm, + param); + if (ret) { + pr_err("[GetBaseVoltage] failed to get GFXCLK AVFS voltage from SMU!"); + return ret; + } + + smu_read_smc_arg(smu, &value); + *voltage = (uint16_t)value; + + return 0; +} + static int navi10_setup_od_limits(struct smu_context *smu) { struct smu_11_0_overdrive_table *overdrive_table = NULL; struct smu_11_0_powerplay_table *powerplay_table = NULL; @@ -1806,16 +1891,40 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali if (ret) return ret; + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; if (initialize) { ret = navi10_setup_od_limits(smu); if (ret) { pr_err("Failed to retrieve board OD limits\n"); return ret; } + if (od_table) { + if (!od_table->GfxclkVolt1) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt1, + od_table->GfxclkFreq1); + if (ret) + od_table->GfxclkVolt1 = 0; + } + + if (!od_table->GfxclkVolt2) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt2, + od_table->GfxclkFreq2); + if (ret) + od_table->GfxclkVolt2 = 0; + } + if (!od_table->GfxclkVolt3) { + ret = navi10_overdrive_get_gfx_clk_base_voltage(smu, + &od_table->GfxclkVolt3, + od_table->GfxclkFreq3); + if (ret) + od_table->GfxclkVolt3 = 0; + } + } } - od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; if (od_table) { navi10_dump_od_table(od_table); } @@ -1847,7 +1956,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL switch (type) { case PP_OD_EDIT_SCLK_VDDC_TABLE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_LIMITS)) { pr_warn("GFXCLK_LIMITS not supported!\n"); return -ENOTSUPP; } @@ -1893,7 +2002,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL } break; case PP_OD_EDIT_MCLK_VDDC_TABLE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_UCLK_MAX)) { pr_warn("UCLK_MAX not supported!\n"); return -ENOTSUPP; } @@ -1911,6 +2020,13 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL return ret; od_table->UclkFmax = input[1]; break; + case PP_OD_RESTORE_DEFAULT_TABLE: + if (!(table_context->overdrive_table && table_context->boot_overdrive_table)) { + pr_err("Overdrive table was not initialized!\n"); + return -EINVAL; + } + memcpy(table_context->overdrive_table, table_context->boot_overdrive_table, sizeof(OverDriveTable_t)); + break; case PP_OD_COMMIT_DPM_TABLE: navi10_dump_od_table(od_table); ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); @@ -1927,7 +2043,7 @@ static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABL } break; case PP_OD_EDIT_VDDC_CURVE: - if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODCAP_GFXCLK_CURVE)) { pr_warn("GFXCLK_CURVE not supported!\n"); return -ENOTSUPP; } @@ -2000,6 +2116,61 @@ static int navi10_run_btc(struct smu_context *smu) return ret; } +static int navi10_dummy_pstate_control(struct smu_context *smu, bool enable) +{ + int result = 0; + + if (!enable) + result = smu_send_smc_msg(smu, SMU_MSG_DAL_DISABLE_DUMMY_PSTATE_CHANGE); + else + result = smu_send_smc_msg(smu, SMU_MSG_DAL_ENABLE_DUMMY_PSTATE_CHANGE); + + return result; +} + +static int navi10_disable_umc_cdr_12gbps_workaround(struct smu_context *smu) +{ + uint32_t uclk_count, uclk_min, uclk_max; + uint32_t smu_version; + int ret = 0; + + ret = smu_get_smc_version(smu, NULL, &smu_version); + if (ret) + return ret; + + /* This workaround is available only for 42.50 or later SMC firmwares */ + if (smu_version < 0x2A3200) + return 0; + + ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_count); + if (ret) + return ret; + + ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)0, &uclk_min); + if (ret) + return ret; + + ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, (uint16_t)(uclk_count - 1), &uclk_max); + if (ret) + return ret; + + /* Force UCLK out of the highest DPM */ + ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_min); + if (ret) + return ret; + + /* Revert the UCLK Hardmax */ + ret = smu_set_hard_freq_range(smu, SMU_UCLK, 0, uclk_max); + if (ret) + return ret; + + /* + * In this case, SMU already disabled dummy pstate during enablement + * of UCLK DPM, we have to re-enabled it. + * */ + return navi10_dummy_pstate_control(smu, true); +} + static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, .alloc_dpm_context = navi10_allocate_dpm_context, @@ -2091,6 +2262,7 @@ static const struct pptable_funcs navi10_ppt_funcs = { .od_edit_dpm_table = navi10_od_edit_dpm_table, .get_pptable_power_limit = navi10_get_pptable_power_limit, .run_btc = navi10_run_btc, + .disable_umc_cdr_12gbps_workaround = navi10_disable_umc_cdr_12gbps_workaround, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/smu_internal.h b/drivers/gpu/drm/amd/powerplay/smu_internal.h index 8872f8b2d502..593271685a98 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_internal.h +++ b/drivers/gpu/drm/amd/powerplay/smu_internal.h @@ -201,4 +201,7 @@ int smu_send_smc_msg(struct smu_context *smu, enum smu_message_type msg); #define smu_update_pcie_parameters(smu, pcie_gen_cap, pcie_width_cap) \ ((smu)->ppt_funcs->update_pcie_parameters ? (smu)->ppt_funcs->update_pcie_parameters((smu), (pcie_gen_cap), (pcie_width_cap)) : 0) +#define smu_disable_umc_cdr_12gbps_workaround(smu) \ + ((smu)->ppt_funcs->disable_umc_cdr_12gbps_workaround ? (smu)->ppt_funcs->disable_umc_cdr_12gbps_workaround((smu)) : 0) + #endif diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index e4268a627eff..e4149e6b68b3 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -948,8 +948,12 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) struct smu_11_0_max_sustainable_clocks *max_sustainable_clocks; int ret = 0; - max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), + if (!smu->smu_table.max_sustainable_clocks) + max_sustainable_clocks = kzalloc(sizeof(struct smu_11_0_max_sustainable_clocks), GFP_KERNEL); + else + max_sustainable_clocks = smu->smu_table.max_sustainable_clocks; + smu->smu_table.max_sustainable_clocks = (void *)max_sustainable_clocks; max_sustainable_clocks->uclock = smu->smu_table.boot_values.uclk / 100; @@ -1807,6 +1811,12 @@ int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, pr_err("Failed to export overdrive table!\n"); return ret; } + if (!table_context->boot_overdrive_table) { + table_context->boot_overdrive_table = kmemdup(table_context->overdrive_table, overdrive_table_size, GFP_KERNEL); + if (!table_context->boot_overdrive_table) { + return -ENOMEM; + } + } } ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); if (ret) { diff --git a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c index 094cfc46adac..29c11694406d 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v12_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v12_0.c @@ -373,9 +373,6 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ { int ret = 0; - if (max < min) - return -EINVAL; - switch (clk_type) { case SMU_GFXCLK: case SMU_SCLK: diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 0d3a3b0a934e..a9c41cd111ce 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -1702,22 +1702,11 @@ static int vega20_set_default_od_settings(struct smu_context *smu, struct smu_table_context *table_context = &smu->smu_table; int ret; - if (initialize) { - if (table_context->overdrive_table) - return -EINVAL; - - table_context->overdrive_table = kzalloc(sizeof(OverDriveTable_t), GFP_KERNEL); - - if (!table_context->overdrive_table) - return -ENOMEM; - - ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, - table_context->overdrive_table, false); - if (ret) { - pr_err("Failed to export over drive table!\n"); - return ret; - } + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + if (initialize) { ret = vega20_set_default_od8_setttings(smu); if (ret) return ret; @@ -2774,12 +2763,11 @@ static int vega20_odn_edit_dpm_table(struct smu_context *smu, break; case PP_OD_RESTORE_DEFAULT_TABLE: - ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); - if (ret) { - pr_err("Failed to export over drive table!\n"); - return ret; + if (!(table_context->overdrive_table && table_context->boot_overdrive_table)) { + pr_err("Overdrive table was not initialized!\n"); + return -EINVAL; } - + memcpy(table_context->overdrive_table, table_context->boot_overdrive_table, sizeof(OverDriveTable_t)); break; case PP_OD_COMMIT_DPM_TABLE: diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index f2e73e6d46b8..10985134ce0b 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -73,7 +73,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) unsigned long prate; unsigned int mask = ATMEL_HLCDC_CLKDIV_MASK | ATMEL_HLCDC_CLKPOL; unsigned int cfg = 0; - int div; + int div, ret; + + ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk); + if (ret) + return; vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; @@ -95,14 +99,14 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) (adj->crtc_hdisplay - 1) | ((adj->crtc_vdisplay - 1) << 16)); + prate = clk_get_rate(crtc->dc->hlcdc->sys_clk); + mode_rate = adj->crtc_clock * 1000; if (!crtc->dc->desc->fixed_clksrc) { + prate *= 2; cfg |= ATMEL_HLCDC_CLKSEL; mask |= ATMEL_HLCDC_CLKSEL; } - prate = 2 * clk_get_rate(crtc->dc->hlcdc->sys_clk); - mode_rate = adj->crtc_clock * 1000; - div = DIV_ROUND_UP(prate, mode_rate); if (div < 2) { div = 2; @@ -117,8 +121,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) int div_low = prate / mode_rate; if (div_low >= 2 && - ((prate / div_low - mode_rate) < - 10 * (mode_rate - prate / div))) + (10 * (prate / div_low - mode_rate) < + (mode_rate - prate / div))) /* * At least 10 times better when using a higher * frequency than requested, instead of a lower. @@ -147,6 +151,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO | ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK, cfg); + + clk_disable_unprepare(crtc->dc->hlcdc->sys_clk); } static enum drm_mode_status diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 67fca439bbfb..24965e53d351 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1624,28 +1624,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode) frame.colorspace = HDMI_COLORSPACE_RGB; /* Set up colorimetry */ - switch (hdmi->hdmi_data.enc_out_encoding) { - case V4L2_YCBCR_ENC_601: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else + if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { + switch (hdmi->hdmi_data.enc_out_encoding) { + case V4L2_YCBCR_ENC_601: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + case V4L2_YCBCR_ENC_709: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_709; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; + break; + default: /* Carries no data */ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + } + } else { + frame.colorimetry = HDMI_COLORIMETRY_NONE; frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; - case V4L2_YCBCR_ENC_709: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else - frame.colorimetry = HDMI_COLORIMETRY_ITU_709; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; - break; - default: /* Carries no data */ - frame.colorimetry = HDMI_COLORIMETRY_ITU_601; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; } frame.scan_mode = HDMI_SCAN_MODE_NONE; diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 8029478ffebb..b0b0ccbb059d 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -297,7 +297,7 @@ static inline int tc_poll_timeout(struct tc_data *tc, unsigned int addr, static int tc_aux_wait_busy(struct tc_data *tc) { - return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 1000, 100000); + return tc_poll_timeout(tc, DP0_AUXSTATUS, AUX_BUSY, 0, 100, 100000); } static int tc_aux_write_data(struct tc_data *tc, const void *data, @@ -640,7 +640,7 @@ static int tc_aux_link_setup(struct tc_data *tc) if (ret) goto err; - ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000); + ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 100, 100000); if (ret == -ETIMEDOUT) { dev_err(tc->dev, "Timeout waiting for PHY to become ready"); return ret; @@ -876,7 +876,7 @@ static int tc_wait_link_training(struct tc_data *tc) int ret; ret = tc_poll_timeout(tc, DP0_LTSTAT, LT_LOOPDONE, - LT_LOOPDONE, 1, 1000); + LT_LOOPDONE, 500, 100000); if (ret) { dev_err(tc->dev, "Link training timeout waiting for LT_LOOPDONE!\n"); return ret; @@ -949,7 +949,7 @@ static int tc_main_link_enable(struct tc_data *tc) dp_phy_ctrl &= ~(DP_PHY_RST | PHY_M1_RST | PHY_M0_RST); ret = regmap_write(tc->regmap, DP_PHY_CTRL, dp_phy_ctrl); - ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1, 1000); + ret = tc_poll_timeout(tc, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 500, 100000); if (ret) { dev_err(dev, "timeout waiting for phy become ready"); return ret; diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 895b73f23079..29367b6506a8 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -879,7 +879,8 @@ bool drm_client_rotation(struct drm_mode_set *modeset, unsigned int *rotation) * depending on the hardware this may require the framebuffer * to be in a specific tiling format. */ - if ((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180 || + if (((*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_0 && + (*rotation & DRM_MODE_ROTATE_MASK) != DRM_MODE_ROTATE_180) || !plane->rotation_property) return false; diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index ca3c55c6b815..2ece2957da1a 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -140,8 +140,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf, if (IS_ERR(source)) return PTR_ERR(source); - if (source[len] == '\n') - source[len] = '\0'; + if (source[len - 1] == '\n') + source[len - 1] = '\0'; ret = crtc->funcs->verify_crc_source(crtc, source, &values_cnt); if (ret) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index e6afe4faeca6..4a65ef8d8bff 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -517,8 +517,10 @@ drm_dp_decode_sideband_req(const struct drm_dp_sideband_msg_tx *raw, } if (failed) { - for (i = 0; i < r->num_transactions; i++) + for (i = 0; i < r->num_transactions; i++) { + tx = &r->transactions[i]; kfree(tx->bytes); + } return -ENOMEM; } @@ -3435,6 +3437,7 @@ static int drm_dp_get_vc_payload_bw(u8 dp_link_bw, u8 dp_link_count) int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool mst_state) { int ret = 0; + int i = 0; struct drm_dp_mst_branch *mstb = NULL; mutex_lock(&mgr->lock); @@ -3495,10 +3498,21 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms /* this can fail if the device is gone */ drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, 0); ret = 0; + mutex_lock(&mgr->payload_lock); memset(mgr->payloads, 0, mgr->max_payloads * sizeof(struct drm_dp_payload)); mgr->payload_mask = 0; set_bit(0, &mgr->payload_mask); + for (i = 0; i < mgr->max_payloads; i++) { + struct drm_dp_vcpi *vcpi = mgr->proposed_vcpis[i]; + + if (vcpi) { + vcpi->vcpi = 0; + vcpi->num_slots = 0; + } + mgr->proposed_vcpis[i] = NULL; + } mgr->vcpi_mask = 0; + mutex_unlock(&mgr->payload_lock); } out_unlock: @@ -3760,7 +3774,8 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY) guid = msg->u.resource_stat.guid; - mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); + if (guid) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); } else { mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad); } diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 0810d3ef6961..6c35407a50eb 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -254,11 +254,16 @@ static void *drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem) if (ret) goto err_zero_use; - if (obj->import_attach) + if (obj->import_attach) { shmem->vaddr = dma_buf_vmap(obj->import_attach->dmabuf); - else + } else { + pgprot_t prot = PAGE_KERNEL; + + if (!shmem->map_cached) + prot = pgprot_writecombine(prot); shmem->vaddr = vmap(shmem->pages, obj->size >> PAGE_SHIFT, - VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + VM_MAP, prot); + } if (!shmem->vaddr) { DRM_DEBUG_KMS("Failed to vmap pages\n"); @@ -537,7 +542,9 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) } vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND; - vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + if (!shmem->map_cached) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); vma->vm_ops = &drm_gem_shmem_vm_ops; diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index b481cafdde28..825abe38201a 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, } DRM_DEBUG_LEASE("Creating lease\n"); + /* lessee will take the ownership of leases */ lessee = drm_lease_create(lessor, &leases); if (IS_ERR(lessee)) { ret = PTR_ERR(lessee); + idr_destroy(&leases); goto out_leases; } @@ -580,7 +582,6 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, out_leases: put_unused_fd(fd); - idr_destroy(&leases); DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret); return ret; diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index e34058c721be..16bff1be4b8a 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -367,9 +367,9 @@ static void mipi_dbi_blank(struct mipi_dbi_dev *dbidev) memset(dbidev->tx_buf, 0, len); mipi_dbi_command(dbi, MIPI_DCS_SET_COLUMN_ADDRESS, 0, 0, - (width >> 8) & 0xFF, (width - 1) & 0xFF); + ((width - 1) >> 8) & 0xFF, (width - 1) & 0xFF); mipi_dbi_command(dbi, MIPI_DCS_SET_PAGE_ADDRESS, 0, 0, - (height >> 8) & 0xFF, (height - 1) & 0xFF); + ((height - 1) >> 8) & 0xFF, (height - 1) & 0xFF); mipi_dbi_command_buf(dbi, MIPI_DCS_WRITE_MEMORY_START, (u8 *)dbidev->tx_buf, len); diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 88232698d7a0..3fd35e6b9d53 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1672,6 +1672,13 @@ static int drm_mode_parse_cmdline_options(char *str, size_t len, } } + if (!(rotation & DRM_MODE_ROTATE_MASK)) + rotation |= DRM_MODE_ROTATE_0; + + /* Make sure there is exactly one rotation defined */ + if (!is_power_of_2(rotation & DRM_MODE_ROTATE_MASK)) + return -EINVAL; + mode->rotation_reflection = rotation; return 0; diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index a86a3ab2771c..235729f4aadb 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -51,8 +51,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align) { drm_dma_handle_t *dmah; - unsigned long addr; - size_t sz; /* pci_alloc_consistent only guarantees alignment to the smallest * PAGE_SIZE order which is greater than or equal to the requested size. @@ -68,20 +66,13 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali dmah->size = size; dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size, &dmah->busaddr, - GFP_KERNEL | __GFP_COMP); + GFP_KERNEL); if (dmah->vaddr == NULL) { kfree(dmah); return NULL; } - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Reserve */ - for (addr = (unsigned long)dmah->vaddr, sz = size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - SetPageReserved(virt_to_page((void *)addr)); - } - return dmah; } @@ -94,19 +85,9 @@ EXPORT_SYMBOL(drm_pci_alloc); */ void __drm_legacy_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah) { - unsigned long addr; - size_t sz; - - if (dmah->vaddr) { - /* XXX - Is virt_to_page() legal for consistent mem? */ - /* Unreserve */ - for (addr = (unsigned long)dmah->vaddr, sz = dmah->size; - sz > 0; addr += PAGE_SIZE, sz -= PAGE_SIZE) { - ClearPageReserved(virt_to_page((void *)addr)); - } + if (dmah->vaddr) dma_free_coherent(&dev->pdev->dev, dmah->size, dmah->vaddr, dmah->busaddr); - } } /** diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index b8363aaa9032..818738e83d06 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -54,7 +54,12 @@ EXPORT_SYMBOL(drm_rect_intersect); static u32 clip_scaled(u32 src, u32 dst, u32 clip) { - u64 tmp = mul_u32_u32(src, dst - clip); + u64 tmp; + + if (dst == 0) + return 0; + + tmp = mul_u32_u32(src, dst - clip); /* * Round toward 1.0 when clipping so that we don't accidentally diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 72726f2c7a9f..0f6497670e29 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1751,8 +1751,9 @@ static int exynos_dsi_probe(struct platform_device *pdev) ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(dsi->supplies), dsi->supplies); if (ret) { - dev_info(dev, "failed to get regulators: %d\n", ret); - return -EPROBE_DEFER; + if (ret != -EPROBE_DEFER) + dev_info(dev, "failed to get regulators: %d\n", ret); + return ret; } dsi->clks = devm_kcalloc(dev, @@ -1765,9 +1766,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) dsi->clks[i] = devm_clk_get(dev, clk_names[i]); if (IS_ERR(dsi->clks[i])) { if (strcmp(clk_names[i], "sclk_mipi") == 0) { - strcpy(clk_names[i], OLD_SCLK_MIPI_CLK_NAME); - i--; - continue; + dsi->clks[i] = devm_clk_get(dev, + OLD_SCLK_MIPI_CLK_NAME); + if (!IS_ERR(dsi->clks[i])) + continue; } dev_info(dev, "failed to get the clock: %s\n", diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 48159d5d2214..d85e15e816e9 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1803,18 +1803,10 @@ static int hdmi_resources_init(struct hdmi_context *hdata) hdata->reg_hdmi_en = devm_regulator_get_optional(dev, "hdmi-en"); - if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) { + if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) if (IS_ERR(hdata->reg_hdmi_en)) return PTR_ERR(hdata->reg_hdmi_en); - ret = regulator_enable(hdata->reg_hdmi_en); - if (ret) { - DRM_DEV_ERROR(dev, - "failed to enable hdmi-en regulator\n"); - return ret; - } - } - return hdmi_bridge_init(hdata); } @@ -2021,6 +2013,15 @@ static int hdmi_probe(struct platform_device *pdev) } } + if (!IS_ERR(hdata->reg_hdmi_en)) { + ret = regulator_enable(hdata->reg_hdmi_en); + if (ret) { + DRM_DEV_ERROR(dev, + "failed to enable hdmi-en regulator\n"); + goto err_hdmiphy; + } + } + pm_runtime_enable(dev); audio_infoframe = &hdata->audio.infoframe; @@ -2045,7 +2046,8 @@ static int hdmi_probe(struct platform_device *pdev) err_rpm_disable: pm_runtime_disable(dev); - + if (!IS_ERR(hdata->reg_hdmi_en)) + regulator_disable(hdata->reg_hdmi_en); err_hdmiphy: if (hdata->hdmiphy_port) put_device(&hdata->hdmiphy_port->dev); diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 218f3bb15276..90237abee088 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -462,6 +462,7 @@ static int psbfb_probe(struct drm_fb_helper *helper, container_of(helper, struct psb_fbdev, psb_fb_helper); struct drm_device *dev = psb_fbdev->psb_fb_helper.dev; struct drm_psb_private *dev_priv = dev->dev_private; + unsigned int fb_size; int bytespp; bytespp = sizes->surface_bpp / 8; @@ -471,8 +472,11 @@ static int psbfb_probe(struct drm_fb_helper *helper, /* If the mode will not fit in 32bit then switch to 16bit to get a console on full resolution. The X mode setting server will allocate its own 32bit GEM framebuffer */ - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height > - dev_priv->vram_stolen_size) { + fb_size = ALIGN(sizes->surface_width * bytespp, 64) * + sizes->surface_height; + fb_size = ALIGN(fb_size, PAGE_SIZE); + + if (fb_size > dev_priv->vram_stolen_size) { sizes->surface_bpp = 16; sizes->surface_depth = 16; } diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h index 0da860200410..e2ac09894a6d 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_ade_reg.h @@ -83,7 +83,6 @@ #define VSIZE_OFST 20 #define LDI_INT_EN 0x741C #define FRAME_END_INT_EN_OFST 1 -#define UNDERFLOW_INT_EN_OFST 2 #define LDI_CTRL 0x7420 #define BPP_OFST 3 #define DATA_GATE_EN BIT(2) diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 73cd28a6ea07..86000127d4ee 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -46,7 +46,6 @@ struct ade_hw_ctx { struct clk *media_noc_clk; struct clk *ade_pix_clk; struct reset_control *reset; - struct work_struct display_reset_wq; bool power_on; int irq; @@ -136,7 +135,6 @@ static void ade_init(struct ade_hw_ctx *ctx) */ ade_update_bits(base + ADE_CTRL, FRM_END_START_OFST, FRM_END_START_MASK, REG_EFFECTIVE_IN_ADEEN_FRMEND); - ade_update_bits(base + LDI_INT_EN, UNDERFLOW_INT_EN_OFST, MASK(1), 1); } static bool ade_crtc_mode_fixup(struct drm_crtc *crtc, @@ -304,17 +302,6 @@ static void ade_crtc_disable_vblank(struct drm_crtc *crtc) MASK(1), 0); } -static void drm_underflow_wq(struct work_struct *work) -{ - struct ade_hw_ctx *ctx = container_of(work, struct ade_hw_ctx, - display_reset_wq); - struct drm_device *drm_dev = ctx->crtc->dev; - struct drm_atomic_state *state; - - state = drm_atomic_helper_suspend(drm_dev); - drm_atomic_helper_resume(drm_dev, state); -} - static irqreturn_t ade_irq_handler(int irq, void *data) { struct ade_hw_ctx *ctx = data; @@ -331,12 +318,6 @@ static irqreturn_t ade_irq_handler(int irq, void *data) MASK(1), 1); drm_crtc_handle_vblank(crtc); } - if (status & BIT(UNDERFLOW_INT_EN_OFST)) { - ade_update_bits(base + LDI_INT_CLR, UNDERFLOW_INT_EN_OFST, - MASK(1), 1); - DRM_ERROR("LDI underflow!"); - schedule_work(&ctx->display_reset_wq); - } return IRQ_HANDLED; } @@ -919,7 +900,6 @@ static void *ade_hw_ctx_alloc(struct platform_device *pdev, if (ret) return ERR_PTR(-EIO); - INIT_WORK(&ctx->display_reset_wq, drm_underflow_wq); ctx->crtc = crtc; return ctx; diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index ba9595960bbe..907c4471f591 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -75,9 +75,8 @@ config DRM_I915_CAPTURE_ERROR help This option enables capturing the GPU state when a hang is detected. This information is vital for triaging hangs and assists in debugging. - Please report any hang to - https://bugs.freedesktop.org/enter_bug.cgi?product=DRI - for triaging. + Please report any hang for triaging according to: + https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs If in doubt, say "Y". diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 2a27fb5d7dc6..1488822398fe 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4227,7 +4227,9 @@ static bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, struct intel_crtc_state *crtc_state) { - if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000) + if (IS_ELKHARTLAKE(dev_priv) && crtc_state->port_clock > 594000) + crtc_state->min_voltage_level = 3; + else if (INTEL_GEN(dev_priv) >= 11 && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 1; else if (IS_CANNONLAKE(dev_priv) && crtc_state->port_clock > 594000) crtc_state->min_voltage_level = 2; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 301897791627..b670239a293b 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10731,7 +10731,7 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) u32 base; if (INTEL_INFO(dev_priv)->display.cursor_needs_physical) - base = obj->phys_handle->busaddr; + base = sg_dma_address(obj->mm.pages->sgl); else base = intel_plane_ggtt_offset(plane_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 12ba74788cce..597e45977349 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -4471,13 +4471,19 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) static void icl_mbus_init(struct drm_i915_private *dev_priv) { - u32 val; + u32 mask, val; - val = MBUS_ABOX_BT_CREDIT_POOL1(16) | - MBUS_ABOX_BT_CREDIT_POOL2(16) | - MBUS_ABOX_B_CREDIT(1) | - MBUS_ABOX_BW_CREDIT(1); + mask = MBUS_ABOX_BT_CREDIT_POOL1_MASK | + MBUS_ABOX_BT_CREDIT_POOL2_MASK | + MBUS_ABOX_B_CREDIT_MASK | + MBUS_ABOX_BW_CREDIT_MASK; + val = I915_READ(MBUS_ABOX_CTL); + val &= ~mask; + val |= MBUS_ABOX_BT_CREDIT_POOL1(16) | + MBUS_ABOX_BT_CREDIT_POOL2(16) | + MBUS_ABOX_B_CREDIT(1) | + MBUS_ABOX_BW_CREDIT(1); I915_WRITE(MBUS_ABOX_CTL, val); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 42385277c684..f3d608df1c4d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -484,6 +484,22 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) return -ENODEV; + /* + * If the cancel fails, we then need to reset, cleanly! + * + * If the per-engine reset fails, all hope is lost! We resort + * to a full GPU reset in that unlikely case, but realistically + * if the engine could not reset, the full reset does not fare + * much better. The damage has been done. + * + * However, if we cannot reset an engine by itself, we cannot + * cleanup a hanging persistent context without causing + * colateral damage, and we should not pretend we can by + * exposing the interface. + */ + if (!intel_has_reset_engine(&ctx->i915->gt)) + return -ENODEV; + i915_gem_context_clear_persistence(ctx); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bc3a67226163..768c4b607dd2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -441,7 +441,8 @@ eb_validate_vma(struct i915_execbuffer *eb, if (unlikely(entry->flags & eb->invalid_flags)) return -EINVAL; - if (unlikely(entry->alignment && !is_power_of_2(entry->alignment))) + if (unlikely(entry->alignment && + !is_power_of_2_u64(entry->alignment))) return -EINVAL; /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index a596548c07bf..b6937469ffd3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -174,7 +174,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(vma->obj != obj); spin_unlock(&obj->vma.lock); - i915_vma_destroy(vma); + __i915_vma_put(vma); spin_lock(&obj->vma.lock); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index e3f3944fbd90..1078a76d6d84 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -260,9 +260,6 @@ struct drm_i915_gem_object { void *gvt_info; }; - - /** for phys allocated objects */ - struct drm_dma_handle *phys_handle; }; static inline struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 8043ff63d73f..5e2e0109c9ba 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -22,88 +22,87 @@ static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; - struct drm_dma_handle *phys; - struct sg_table *st; struct scatterlist *sg; - char *vaddr; + struct sg_table *st; + dma_addr_t dma; + void *vaddr; + void *dst; int i; - int err; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) return -EINVAL; - /* Always aligning to the object size, allows a single allocation + /* + * Always aligning to the object size, allows a single allocation * to handle all possible callers, and given typical object sizes, * the alignment of the buddy allocation will naturally match. */ - phys = drm_pci_alloc(obj->base.dev, - roundup_pow_of_two(obj->base.size), - roundup_pow_of_two(obj->base.size)); - if (!phys) + vaddr = dma_alloc_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + &dma, GFP_KERNEL); + if (!vaddr) return -ENOMEM; - vaddr = phys->vaddr; + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto err_pci; + + if (sg_alloc_table(st, 1, GFP_KERNEL)) + goto err_st; + + sg = st->sgl; + sg->offset = 0; + sg->length = obj->base.size; + + sg_assign_page(sg, (struct page *)vaddr); + sg_dma_address(sg) = dma; + sg_dma_len(sg) = obj->base.size; + + dst = vaddr; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; - char *src; + void *src; page = shmem_read_mapping_page(mapping, i); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto err_phys; - } + if (IS_ERR(page)) + goto err_st; src = kmap_atomic(page); - memcpy(vaddr, src, PAGE_SIZE); - drm_clflush_virt_range(vaddr, PAGE_SIZE); + memcpy(dst, src, PAGE_SIZE); + drm_clflush_virt_range(dst, PAGE_SIZE); kunmap_atomic(src); put_page(page); - vaddr += PAGE_SIZE; + dst += PAGE_SIZE; } intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) { - err = -ENOMEM; - goto err_phys; - } - - if (sg_alloc_table(st, 1, GFP_KERNEL)) { - kfree(st); - err = -ENOMEM; - goto err_phys; - } - - sg = st->sgl; - sg->offset = 0; - sg->length = obj->base.size; - - sg_dma_address(sg) = phys->busaddr; - sg_dma_len(sg) = obj->base.size; - - obj->phys_handle = phys; - __i915_gem_object_set_pages(obj, st, sg->length); return 0; -err_phys: - drm_pci_free(obj->base.dev, phys); - - return err; +err_st: + kfree(st); +err_pci: + dma_free_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + vaddr, dma); + return -ENOMEM; } static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { + dma_addr_t dma = sg_dma_address(pages->sgl); + void *vaddr = sg_page(pages->sgl); + __i915_gem_object_release_shmem(obj, pages, false); if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; - char *vaddr = obj->phys_handle->vaddr; + void *src = vaddr; int i; for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { @@ -115,15 +114,16 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, continue; dst = kmap_atomic(page); - drm_clflush_virt_range(vaddr, PAGE_SIZE); - memcpy(dst, vaddr, PAGE_SIZE); + drm_clflush_virt_range(src, PAGE_SIZE); + memcpy(dst, src, PAGE_SIZE); kunmap_atomic(dst); set_page_dirty(page); if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); put_page(page); - vaddr += PAGE_SIZE; + + src += PAGE_SIZE; } obj->mm.dirty = false; } @@ -131,7 +131,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, sg_free_table(pages); kfree(pages); - drm_pci_free(obj->base.dev, obj->phys_handle); + dma_free_coherent(&obj->base.dev->pdev->dev, + roundup_pow_of_two(obj->base.size), + vaddr, dma); } static void phys_release(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index f2418a1cfe68..b5875063b97c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -257,8 +257,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *i915) with_intel_runtime_pm(&i915->runtime_pm, wakeref) { freed = i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_BOUND | - I915_SHRINK_UNBOUND | - I915_SHRINK_ACTIVE); + I915_SHRINK_UNBOUND); } return freed; @@ -337,7 +336,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) freed_pages = 0; with_intel_runtime_pm(&i915->runtime_pm, wakeref) freed_pages += i915_gem_shrink(i915, -1UL, NULL, - I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_WRITEBACK); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 688c49a24f32..bd1e2c12de63 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1110,8 +1110,7 @@ static int __igt_write_huge(struct intel_context *ce, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_destroy(vma); - + __i915_vma_put(vma); return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 29b2077b73d2..3d8f00d40d42 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -161,7 +161,7 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, kunmap(p); out: - i915_vma_destroy(vma); + __i915_vma_put(vma); return err; } @@ -255,7 +255,7 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (err) return err; - i915_vma_destroy(vma); + __i915_vma_put(vma); if (igt_timeout(end_time, "%s: timed out after tiling=%d stride=%d\n", @@ -567,7 +567,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, obj = i915_gem_object_create_internal(i915, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + return false; err = create_mmap_offset(obj); i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d925a1035c9d..b2fba630d784 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1157,7 +1157,7 @@ static u64 execlists_update_context(struct i915_request *rq) { struct intel_context *ce = rq->hw_context; u64 desc = ce->lrc_desc; - u32 tail; + u32 tail, prev; /* * WaIdleLiteRestore:bdw,skl @@ -1170,9 +1170,15 @@ static u64 execlists_update_context(struct i915_request *rq) * subsequent resubmissions (for lite restore). Should that fail us, * and we try and submit the same tail again, force the context * reload. + * + * If we need to return to a preempted context, we need to skip the + * lite-restore and force it to reload the RING_TAIL. Otherwise, the + * HW has a tendency to ignore us rewinding the TAIL to the end of + * an earlier request. */ tail = intel_ring_set_tail(rq->ring, rq->tail); - if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail)) + prev = ce->lrc_reg_state[CTX_RING_TAIL]; + if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) desc |= CTX_DESC_FORCE_RESTORE; ce->lrc_reg_state[CTX_RING_TAIL] = tail; rq->tail = rq->wa_tail; @@ -1416,16 +1422,10 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_unlock(&old->breadcrumbs.irq_lock); } -static struct i915_request * -last_active(const struct intel_engine_execlists *execlists) -{ - struct i915_request * const *last = READ_ONCE(execlists->active); - - while (*last && i915_request_completed(*last)) - last++; - - return *last; -} +#define for_each_waiter(p__, rq__) \ + list_for_each_entry_lockless(p__, \ + &(rq__)->sched.waiters_list, \ + wait_link) static void defer_request(struct i915_request *rq, struct list_head * const pl) { @@ -1444,7 +1444,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) GEM_BUG_ON(i915_request_is_active(rq)); list_move_tail(&rq->sched.link, pl); - list_for_each_entry(p, &rq->sched.waiters_list, wait_link) { + for_each_waiter(p, rq) { struct i915_request *w = container_of(p->waiter, typeof(*w), sched); @@ -1490,11 +1490,9 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq) if (!intel_engine_has_timeslices(engine)) return false; - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return false; - - hint = max(rq_prio(list_next_entry(rq, sched.link)), - engine->execlists.queue_priority_hint); + hint = engine->execlists.queue_priority_hint; + if (!list_is_last(&rq->sched.link, &engine->active.requests)) + hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); return hint >= effective_prio(rq); } @@ -1536,16 +1534,26 @@ static void set_timeslice(struct intel_engine_cs *engine) set_timer_ms(&engine->execlists.timer, active_timeslice(engine)); } +static void start_timeslice(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + + execlists->switch_priority_hint = execlists->queue_priority_hint; + + if (timer_pending(&execlists->timer)) + return; + + set_timer_ms(&execlists->timer, timeslice(engine)); +} + static void record_preemption(struct intel_engine_execlists *execlists) { (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } -static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { - struct i915_request *rq; - - rq = last_active(&engine->execlists); if (!rq) return 0; @@ -1556,13 +1564,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) return READ_ONCE(engine->props.preempt_timeout_ms); } -static void set_preempt_timeout(struct intel_engine_cs *engine) +static void set_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { if (!intel_engine_has_preempt_reset(engine)) return; set_timer_ms(&engine->execlists.preempt, - active_preempt_timeout(engine)); + active_preempt_timeout(engine, rq)); } static void execlists_dequeue(struct intel_engine_cs *engine) @@ -1570,6 +1579,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request * const *active; struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -1624,7 +1634,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. */ - last = last_active(execlists); + active = READ_ONCE(execlists->active); + while ((last = *active) && i915_request_completed(last)) + active++; + if (last) { if (need_preempt(engine, last, rb)) { GEM_TRACE("%s: preempting last=%llx:%lld, prio=%d, hint=%d\n", @@ -1651,14 +1664,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ __unwind_incomplete_requests(engine); - /* - * If we need to return to the preempted context, we - * need to skip the lite-restore and force it to - * reload the RING_TAIL. Otherwise, the HW has a - * tendency to ignore us rewinding the TAIL to the - * end of an earlier request. - */ - last->hw_context->lrc_desc |= CTX_DESC_FORCE_RESTORE; last = NULL; } else if (need_timeslice(engine, last) && timer_expired(&engine->execlists.timer)) { @@ -1702,11 +1707,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ - if (!execlists->timer.expires && - need_timeslice(engine, last)) - set_timer_ms(&execlists->timer, - timeslice(engine)); - + start_timeslice(engine); return; } } @@ -1741,7 +1742,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.active.lock); - return; /* leave this for another */ + start_timeslice(engine); + return; /* leave this for another sibling */ } GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n", @@ -1920,7 +1922,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * Skip if we ended up with exactly the same set of requests, * e.g. trying to timeslice a pair of ordered contexts */ - if (!memcmp(execlists->active, execlists->pending, + if (!memcmp(active, execlists->pending, (port - execlists->pending + 1) * sizeof(*port))) { do execlists_schedule_out(fetch_and_zero(port)); @@ -1932,7 +1934,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) memset(port + 1, 0, (last_port - port) * sizeof(*port)); execlists_submit_ports(engine); - set_preempt_timeout(engine); + set_preempt_timeout(engine, *active); } else { skip_submit: ring_set_paused(engine, 0); @@ -3493,26 +3495,6 @@ static int gen12_emit_flush_render(struct i915_request *request, *cs++ = preparser_disable(false); intel_ring_advance(request, cs); - - /* - * Wa_1604544889:tgl - */ - if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { - flags = 0; - flags |= PIPE_CONTROL_CS_STALL; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; - - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; - - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cs = gen8_emit_pipe_control(cs, flags, - LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); - } } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..6ff803f397c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -145,6 +145,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) kref_init(&ring->ref); ring->size = size; + ring->wrap = BITS_PER_TYPE(ring->size) - ilog2(size); /* * Workaround an erratum on the i830 which causes a hang if diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index ea2839d9e044..5bdce24994aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -56,6 +56,14 @@ static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos) return pos & (ring->size - 1); } +static inline int intel_ring_direction(const struct intel_ring *ring, + u32 next, u32 prev) +{ + typecheck(typeof(ring->size), next); + typecheck(typeof(ring->size), prev); + return (next - prev) << ring->wrap; +} + static inline bool intel_ring_offset_valid(const struct intel_ring *ring, unsigned int pos) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h index d9f17f38e0cc..3cd7fec7fd8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_types.h +++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h @@ -45,6 +45,7 @@ struct intel_ring { u32 space; u32 size; + u32 wrap; u32 effective_size; }; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 649798c184fb..9321b9328bd2 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -197,11 +197,15 @@ static void cacheline_release(struct intel_timeline_cacheline *cl) static void cacheline_free(struct intel_timeline_cacheline *cl) { + if (!i915_active_acquire_if_busy(&cl->active)) { + __idle_cacheline_free(cl); + return; + } + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); - if (i915_active_is_idle(&cl->active)) - __idle_cacheline_free(cl); + i915_active_release(&cl->active); } int intel_timeline_init(struct intel_timeline *timeline, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e4bccc14602f..8cafdee7d43d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1504,15 +1504,34 @@ create_scratch(struct i915_address_space *vm, int count) return ERR_PTR(err); } +static const struct { + u32 start; + u32 end; +} mcr_ranges_gen8[] = { + { .start = 0x5500, .end = 0x55ff }, + { .start = 0x7000, .end = 0x7fff }, + { .start = 0x9400, .end = 0x97ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xe000, .end = 0xe7ff }, + {}, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { + int i; + + if (INTEL_GEN(i915) < 8) + return false; + /* - * Registers in this range are affected by the MCR selector + * Registers in these ranges are affected by the MCR selector * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) - return true; + for (i = 0; mcr_ranges_gen8[i].start; i++) + if (offset >= mcr_ranges_gen8[i].start && + offset <= mcr_ranges_gen8[i].end) + return true; return false; } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 83f549d203a0..a635cf832d69 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -59,11 +59,26 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); + ring->vma = i915_vma_alloc(); + if (!ring->vma) { + kfree(ring); + return NULL; + } + i915_active_init(&ring->vma->active, NULL, NULL); + intel_ring_update_space(ring); return ring; } +static void mock_ring_free(struct intel_ring *ring) +{ + i915_active_fini(&ring->vma->active); + i915_vma_free(ring->vma); + + kfree(ring); +} + static struct i915_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -121,7 +136,7 @@ static void mock_context_destroy(struct kref *ref) GEM_BUG_ON(intel_context_is_pinned(ce)); if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) { - kfree(ce->ring); + mock_ring_free(ce->ring); mock_timeline_unpin(ce->timeline); } diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index e1c313da6c00..a62bdf9be682 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -457,7 +457,8 @@ void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected) struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; /* TODO: add more platforms support */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) { if (connected) { vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index 2477a1e5a166..ae139f0877ae 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -151,12 +151,12 @@ static void dmabuf_gem_object_free(struct kref *kref) dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj, list); if (dmabuf_obj == obj) { + list_del(pos); intel_gvt_hypervisor_put_vfio_device(vgpu); idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id); kfree(dmabuf_obj->info); kfree(dmabuf_obj); - list_del(pos); break; } } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 4b04af569c05..7dc7bb850d0a 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1956,7 +1956,11 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) if (mm->type == INTEL_GVT_MM_PPGTT) { list_del(&mm->ppgtt_mm.list); + + mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); list_del(&mm->ppgtt_mm.lru_list); + mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); + invalidate_ppgtt_mm(mm); } else { vfree(mm->ggtt_mm.virtual_ggtt); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 85bd9bf4f6ee..345c2aa3b491 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -272,10 +272,17 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; - mutex_lock(&vgpu->vgpu_lock); - WARN(vgpu->active, "vGPU is still active!\n"); + /* + * remove idr first so later clean can judge if need to stop + * service if no active vgpu. + */ + mutex_lock(&gvt->lock); + idr_remove(&gvt->vgpu_idr, vgpu->id); + mutex_unlock(&gvt->lock); + + mutex_lock(&vgpu->vgpu_lock); intel_gvt_debugfs_remove_vgpu(vgpu); intel_vgpu_clean_sched_policy(vgpu); intel_vgpu_clean_submission(vgpu); @@ -290,7 +297,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) mutex_unlock(&vgpu->vgpu_lock); mutex_lock(&gvt->lock); - idr_remove(&gvt->vgpu_idr, vgpu->id); if (idr_is_empty(&gvt->vgpu_idr)) intel_gvt_clean_irq(gvt); intel_gvt_update_vgpu_types(gvt); @@ -560,9 +566,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index a19e7d89bc8a..378b52d1ab74 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -91,10 +91,9 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - spin_lock_irq(&ref->tree_lock); + lockdep_assert_held(&ref->tree_lock); if (!atomic_read(&ref->count)) /* before the first inc */ debug_object_activate(ref, &active_debug_desc); - spin_unlock_irq(&ref->tree_lock); } static void debug_active_deactivate(struct i915_active *ref) @@ -407,8 +406,10 @@ int i915_active_acquire(struct i915_active *ref) if (!atomic_read(&ref->count) && ref->active) err = ref->active(ref); if (!err) { + spin_lock_irq(&ref->tree_lock); /* vs __active_retire() */ debug_active_activate(ref); atomic_inc(&ref->count); + spin_unlock_irq(&ref->tree_lock); } mutex_unlock(&ref->mutex); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 905890e3ac24..0475a0343487 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -119,33 +119,65 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags) { - struct i915_vma *vma; + struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm; LIST_HEAD(still_in_list); - int ret = 0; + intel_wakeref_t wakeref; + struct i915_vma *vma; + int ret; + + if (!atomic_read(&obj->bind_count)) + return 0; + /* + * As some machines use ACPI to handle runtime-resume callbacks, and + * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex + * as they are required by the shrinker. Ergo, we wake the device up + * first just in case. + */ + wakeref = intel_runtime_pm_get(rpm); + +try_again: + ret = 0; spin_lock(&obj->vma.lock); while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, struct i915_vma, obj_link))) { struct i915_address_space *vm = vma->vm; - ret = -EBUSY; + list_move_tail(&vma->obj_link, &still_in_list); + if (!i915_vma_is_bound(vma, I915_VMA_BIND_MASK)) + continue; + + ret = -EAGAIN; if (!i915_vm_tryopen(vm)) break; - list_move_tail(&vma->obj_link, &still_in_list); + /* Prevent vma being freed by i915_vma_parked as we unbind */ + vma = __i915_vma_get(vma); spin_unlock(&obj->vma.lock); - if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || - !i915_vma_is_active(vma)) - ret = i915_vma_unbind(vma); + if (vma) { + ret = -EBUSY; + if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || + !i915_vma_is_active(vma)) + ret = i915_vma_unbind(vma); + + __i915_vma_put(vma); + } i915_vm_close(vm); spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma.list); + list_splice_init(&still_in_list, &obj->vma.list); spin_unlock(&obj->vma.lock); + if (ret == -EAGAIN && flags & I915_GEM_OBJECT_UNBIND_ACTIVE) { + rcu_barrier(); /* flush the i915_vm_release() */ + goto try_again; + } + + intel_runtime_pm_put(rpm, wakeref); + return ret; } @@ -154,7 +186,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - void *vaddr = obj->phys_handle->vaddr + args->offset; + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); /* @@ -800,10 +832,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, ret = i915_gem_gtt_pwrite_fast(obj, args); if (ret == -EFAULT || ret == -ENOSPC) { - if (obj->phys_handle) - ret = i915_gem_phys_pwrite(obj, args, file); - else + if (i915_gem_object_has_struct_page(obj)) ret = i915_gem_shmem_pwrite(obj, args); + else + ret = i915_gem_phys_pwrite(obj, args, file); } i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 44727806dfd7..dd2c20f7d4d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -522,7 +522,7 @@ void __i915_vm_close(struct i915_address_space *vm) atomic_and(~I915_VMA_PIN_MASK, &vma->flags); WARN_ON(__i915_vma_unbind(vma)); - i915_vma_destroy(vma); + __i915_vma_put(vma); i915_gem_object_put(obj); } @@ -1790,7 +1790,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - i915_vma_destroy(ppgtt->vma); + __i915_vma_put(ppgtt->vma); gen6_ppgtt_free_pd(ppgtt); free_scratch(vm); @@ -1878,6 +1878,7 @@ static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size) i915_active_init(&vma->active, NULL, NULL); + kref_init(&vma->ref); mutex_init(&vma->pages_mutex); vma->vm = i915_vm_get(&ggtt->vm); vma->ops = &pd_vma_ops; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3c85cb0ee99f..354845800085 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1820,7 +1820,8 @@ void i915_capture_error_state(struct drm_i915_private *i915, if (!xchg(&warned, true) && ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); - pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); + pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n"); + pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n"); pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n"); pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n"); pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n", diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2ae14bc14931..cc917200bdeb 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1950,9 +1950,10 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) return i915_vma_get(oa_bo->vma); } -static int emit_oa_config(struct i915_perf_stream *stream, - struct i915_oa_config *oa_config, - struct intel_context *ce) +static struct i915_request * +emit_oa_config(struct i915_perf_stream *stream, + struct i915_oa_config *oa_config, + struct intel_context *ce) { struct i915_request *rq; struct i915_vma *vma; @@ -1960,7 +1961,7 @@ static int emit_oa_config(struct i915_perf_stream *stream, vma = get_oa_vma(stream, oa_config); if (IS_ERR(vma)) - return PTR_ERR(vma); + return ERR_CAST(vma); err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -1983,13 +1984,17 @@ static int emit_oa_config(struct i915_perf_stream *stream, err = rq->engine->emit_bb_start(rq, vma->node.start, 0, I915_DISPATCH_SECURE); + if (err) + goto err_add_request; + + i915_request_get(rq); err_add_request: i915_request_add(rq); err_vma_unpin: i915_vma_unpin(vma); err_vma_put: i915_vma_put(vma); - return err; + return err ? ERR_PTR(err) : rq; } static struct intel_context *oa_context(struct i915_perf_stream *stream) @@ -1997,7 +2002,8 @@ static struct intel_context *oa_context(struct i915_perf_stream *stream) return stream->pinned_ctx ?: stream->engine->kernel_context; } -static int hsw_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +hsw_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -2408,7 +2414,8 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs)); } -static int gen8_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2450,12 +2457,13 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) */ ret = lrc_configure_all_contexts(stream, oa_config); if (ret) - return ret; + return ERR_PTR(ret); return emit_oa_config(stream, oa_config, oa_context(stream)); } -static int gen12_enable_metric_set(struct i915_perf_stream *stream) +static struct i915_request * +gen12_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; @@ -2488,7 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) */ ret = gen12_configure_all_contexts(stream, oa_config); if (ret) - return ret; + return ERR_PTR(ret); /* * For Gen12, performance counters are context @@ -2498,7 +2506,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream) if (stream->ctx) { ret = gen12_configure_oar_context(stream, true); if (ret) - return ret; + return ERR_PTR(ret); } return emit_oa_config(stream, oa_config, oa_context(stream)); @@ -2693,6 +2701,20 @@ static const struct i915_perf_stream_ops i915_oa_stream_ops = { .read = i915_oa_read, }; +static int i915_perf_stream_enable_sync(struct i915_perf_stream *stream) +{ + struct i915_request *rq; + + rq = stream->perf->ops.enable_metric_set(stream); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + + return 0; +} + /** * i915_oa_stream_init - validate combined props for OA stream and init * @stream: An i915 perf stream @@ -2826,7 +2848,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, stream->ops = &i915_oa_stream_ops; perf->exclusive_stream = stream; - ret = perf->ops.enable_metric_set(stream); + ret = i915_perf_stream_enable_sync(stream); if (ret) { DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; @@ -3144,7 +3166,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, return -EINVAL; if (config != stream->oa_config) { - int err; + struct i915_request *rq; /* * If OA is bound to a specific context, emit the @@ -3155,11 +3177,13 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - err = emit_oa_config(stream, config, oa_context(stream)); - if (err == 0) + rq = emit_oa_config(stream, config, oa_context(stream)); + if (!IS_ERR(rq)) { config = xchg(&stream->oa_config, config); - else - ret = err; + i915_request_put(rq); + } else { + ret = PTR_ERR(rq); + } } i915_oa_config_put(config); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..171f95a97ef6 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -339,7 +339,8 @@ struct i915_oa_ops { * counter reports being sampled. May apply system constraints such as * disabling EU clock gating as required. */ - int (*enable_metric_set)(struct i915_perf_stream *stream); + struct i915_request * + (*enable_metric_set)(struct i915_perf_stream *stream); /** * @disable_metric_set: Remove system constraints associated with using diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d6d2e6fb8674..40036eff709c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -594,8 +594,10 @@ static void i915_pmu_enable(struct perf_event *event) container_of(event->pmu, typeof(*i915), pmu.base); unsigned int bit = event_enabled_bit(event); struct i915_pmu *pmu = &i915->pmu; + intel_wakeref_t wakeref; unsigned long flags; + wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -605,6 +607,14 @@ static void i915_pmu_enable(struct perf_event *event) BUILD_BUG_ON(ARRAY_SIZE(pmu->enable_count) != I915_PMU_MASK_BITS); GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); + + if (pmu->enable_count[bit] == 0 && + config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sleep_last = ktime_get(); + } + pmu->enable |= BIT_ULL(bit); pmu->enable_count[bit]++; @@ -645,6 +655,8 @@ static void i915_pmu_enable(struct perf_event *event) * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 765bec89fc0d..50effee6b845 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -529,19 +529,31 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } +static void irq_semaphore_cb(struct irq_work *wrk) +{ + struct i915_request *rq = + container_of(wrk, typeof(*rq), semaphore_work); + + i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); + i915_request_put(rq); +} + static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - struct i915_request *request = - container_of(fence, typeof(*request), semaphore); + struct i915_request *rq = container_of(fence, typeof(*rq), semaphore); switch (state) { case FENCE_COMPLETE: - i915_schedule_bump_priority(request, I915_PRIORITY_NOSEMAPHORE); + if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { + i915_request_get(rq); + init_irq_work(&rq->semaphore_work, irq_semaphore_cb); + irq_work_queue(&rq->semaphore_work); + } break; case FENCE_FREE: - i915_request_put(request); + i915_request_put(rq); break; } @@ -759,8 +771,8 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) struct dma_fence *fence; int err; - GEM_BUG_ON(i915_request_timeline(rq) == - rcu_access_pointer(signal->timeline)); + if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)) + return 0; rcu_read_lock(); tl = rcu_dereference(signal->timeline); @@ -785,7 +797,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) return PTR_ERR_OR_ZERO(fence); err = 0; - if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) + if (!intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) err = i915_sw_fence_await_dma_fence(&rq->submit, fence, 0, I915_FENCE_GFP); @@ -1283,9 +1295,9 @@ void __i915_request_queue(struct i915_request *rq, * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - i915_sw_fence_commit(&rq->semaphore); if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); + i915_sw_fence_commit(&rq->semaphore); i915_sw_fence_commit(&rq->submit); } diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 96991d64759c..4e6d543c0104 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -26,6 +26,7 @@ #define I915_REQUEST_H #include +#include #include #include "gt/intel_context_types.h" @@ -147,6 +148,7 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; + struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 247a9671bca5..e954fa6109c5 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -415,8 +415,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, if (!node_signaled(signal)) { INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->waiter = node; dep->flags = flags; @@ -426,6 +424,10 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, !node_started(signal)) node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; + /* All set, now publish. Beware the lockless walkers. */ + list_add(&dep->signal_link, &node->signalers_list); + list_add_rcu(&dep->wait_link, &signal->waiters_list); + /* * As we do not allow WAIT to preempt inflight requests, * once we have executed a request, along with triggering diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 0348c6d0ef5f..412135a07d5d 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -8,9 +8,8 @@ #include "i915_drv.h" #include "i915_utils.h" -#define FDO_BUG_URL "https://bugs.freedesktop.org/enter_bug.cgi?product=DRI" -#define FDO_BUG_MSG "Please file a bug at " FDO_BUG_URL " against DRM/Intel " \ - "providing the dmesg log by booting with drm.debug=0xf" +#define FDO_BUG_URL "https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs" +#define FDO_BUG_MSG "Please file a bug on drm/i915; see " FDO_BUG_URL " for details." void __i915_printk(struct drm_i915_private *dev_priv, const char *level, diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 04139ba1191e..87ec24996bfb 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -234,6 +234,11 @@ static inline u64 ptr_to_u64(const void *ptr) __idx; \ }) +static inline bool is_power_of_2_u64(u64 n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 01c822256b39..00973017abba 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -112,6 +112,7 @@ vma_create(struct drm_i915_gem_object *obj, if (vma == NULL) return ERR_PTR(-ENOMEM); + kref_init(&vma->ref); mutex_init(&vma->pages_mutex); vma->vm = i915_vm_get(vm); vma->ops = &vm->vma_ops; @@ -978,8 +979,10 @@ void i915_vma_reopen(struct i915_vma *vma) __i915_vma_remove_closed(vma); } -void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_release(struct kref *ref) { + struct i915_vma *vma = container_of(ref, typeof(*vma), ref); + if (drm_mm_node_allocated(&vma->node)) { mutex_lock(&vma->vm->mutex); atomic_and(~I915_VMA_PIN_MASK, &vma->flags); @@ -1027,7 +1030,7 @@ void i915_vma_parked(struct intel_gt *gt) spin_unlock_irq(>->closed_lock); if (obj) { - i915_vma_destroy(vma); + __i915_vma_put(vma); i915_gem_object_put(obj); } @@ -1192,7 +1195,7 @@ int __i915_vma_unbind(struct i915_vma *vma) i915_vma_detach(vma); vma_unbind_pages(vma); - drm_mm_remove_node(&vma->node); /* pairs with i915_vma_destroy() */ + drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 465932813bc5..ce1db908ad69 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -51,14 +51,19 @@ enum i915_cache_level; */ struct i915_vma { struct drm_mm_node node; - struct drm_i915_gem_object *obj; + struct i915_address_space *vm; const struct i915_vma_ops *ops; - struct i915_fence_reg *fence; + + struct drm_i915_gem_object *obj; struct dma_resv *resv; /** Alias of obj->resv */ + struct sg_table *pages; void __iomem *iomap; void *private; /* owned by creator */ + + struct i915_fence_reg *fence; + u64 size; u64 display_alignment; struct i915_page_sizes page_sizes; @@ -71,6 +76,7 @@ struct i915_vma { * handles (but same file) for execbuf, i.e. the number of aliases * that exist in the ctx->handle_vmas LUT for this vma. */ + struct kref ref; atomic_t open_count; atomic_t flags; /** @@ -333,7 +339,20 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); + +static inline struct i915_vma *__i915_vma_get(struct i915_vma *vma) +{ + if (kref_get_unless_zero(&vma->ref)) + return vma; + + return NULL; +} + +void i915_vma_release(struct kref *ref); +static inline void __i915_vma_put(struct i915_vma *vma) +{ + kref_put(&vma->ref, i915_vma_release); +} #define assert_vma_held(vma) dma_resv_assert_held((vma)->resv) diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.c b/drivers/gpu/drm/ingenic/ingenic-drm.c index ec32e1c67335..43a015f33e97 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm.c @@ -372,14 +372,18 @@ static void ingenic_drm_plane_atomic_update(struct drm_plane *plane, struct ingenic_drm *priv = drm_plane_get_priv(plane); struct drm_plane_state *state = plane->state; unsigned int width, height, cpp; + dma_addr_t addr; - width = state->crtc->state->adjusted_mode.hdisplay; - height = state->crtc->state->adjusted_mode.vdisplay; - cpp = state->fb->format->cpp[plane->index]; + if (state && state->fb) { + addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); + width = state->crtc->state->adjusted_mode.hdisplay; + height = state->crtc->state->adjusted_mode.vdisplay; + cpp = state->fb->format->cpp[plane->index]; - priv->dma_hwdesc->addr = drm_fb_cma_get_gem_addr(state->fb, state, 0); - priv->dma_hwdesc->cmd = width * height * cpp / 4; - priv->dma_hwdesc->cmd |= JZ_LCD_CMD_EOF_IRQ; + priv->dma_hwdesc->addr = addr; + priv->dma_hwdesc->cmd = width * height * cpp / 4; + priv->dma_hwdesc->cmd |= JZ_LCD_CMD_EOF_IRQ; + } } static void ingenic_drm_encoder_atomic_mode_set(struct drm_encoder *encoder, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 3305a94fc930..1d46fbe9e07c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -328,6 +328,7 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc) static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) { struct drm_device *drm = mtk_crtc->base.dev; + struct drm_crtc *crtc = &mtk_crtc->base; int i; DRM_DEBUG_DRIVER("%s\n", __func__); @@ -353,6 +354,13 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc) mtk_disp_mutex_unprepare(mtk_crtc->mutex); pm_runtime_put(drm->dev); + + if (crtc->state->event && !crtc->state->active) { + spin_lock_irq(&crtc->dev->event_lock); + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irq(&crtc->dev->event_lock); + } } static void mtk_crtc_ddp_config(struct drm_crtc *crtc) @@ -541,10 +549,18 @@ static const struct drm_crtc_helper_funcs mtk_crtc_helper_funcs = { static int mtk_drm_crtc_init(struct drm_device *drm, struct mtk_drm_crtc *mtk_crtc, - struct drm_plane *primary, - struct drm_plane *cursor, unsigned int pipe) + unsigned int pipe) { - int ret; + struct drm_plane *primary = NULL; + struct drm_plane *cursor = NULL; + int i, ret; + + for (i = 0; i < mtk_crtc->layer_nr; i++) { + if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_PRIMARY) + primary = &mtk_crtc->planes[i]; + else if (mtk_crtc->planes[i].type == DRM_PLANE_TYPE_CURSOR) + cursor = &mtk_crtc->planes[i]; + } ret = drm_crtc_init_with_planes(drm, &mtk_crtc->base, primary, cursor, &mtk_crtc_funcs, NULL); @@ -590,11 +606,12 @@ static int mtk_drm_crtc_num_comp_planes(struct mtk_drm_crtc *mtk_crtc, } static inline -enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx) +enum drm_plane_type mtk_drm_crtc_plane_type(unsigned int plane_idx, + unsigned int num_planes) { if (plane_idx == 0) return DRM_PLANE_TYPE_PRIMARY; - else if (plane_idx == 1) + else if (plane_idx == (num_planes - 1)) return DRM_PLANE_TYPE_CURSOR; else return DRM_PLANE_TYPE_OVERLAY; @@ -613,7 +630,8 @@ static int mtk_drm_crtc_init_comp_planes(struct drm_device *drm_dev, ret = mtk_plane_init(drm_dev, &mtk_crtc->planes[mtk_crtc->layer_nr], BIT(pipe), - mtk_drm_crtc_plane_type(mtk_crtc->layer_nr), + mtk_drm_crtc_plane_type(mtk_crtc->layer_nr, + num_planes), mtk_ddp_comp_supported_rotations(comp)); if (ret) return ret; @@ -633,6 +651,7 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, int pipe = priv->num_pipes; int ret; int i; + uint gamma_lut_size = 0; if (!path) return 0; @@ -683,6 +702,9 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, } mtk_crtc->ddp_comp[i] = comp; + + if (comp->funcs && comp->funcs->gamma_set) + gamma_lut_size = MTK_LUT_SIZE; } for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) @@ -698,13 +720,13 @@ int mtk_drm_crtc_create(struct drm_device *drm_dev, return ret; } - ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, &mtk_crtc->planes[0], - mtk_crtc->layer_nr > 1 ? &mtk_crtc->planes[1] : - NULL, pipe); + ret = mtk_drm_crtc_init(drm_dev, mtk_crtc, pipe); if (ret < 0) return ret; - drm_mode_crtc_set_gamma_size(&mtk_crtc->base, MTK_LUT_SIZE); - drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, false, MTK_LUT_SIZE); + + if (gamma_lut_size) + drm_mode_crtc_set_gamma_size(&mtk_crtc->base, gamma_lut_size); + drm_crtc_enable_color_mgmt(&mtk_crtc->base, 0, false, gamma_lut_size); priv->num_pipes++; return 0; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 7f21307cda75..c26b2faa15cf 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -358,6 +358,7 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node, /* Only DMA capable components need the LARB property */ comp->larb_dev = NULL; if (type != MTK_DISP_OVL && + type != MTK_DISP_OVL_2L && type != MTK_DISP_RDMA && type != MTK_DISP_WDMA) return 0; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index b02e2042547f..7d9e63e20ded 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -753,11 +753,18 @@ static int a5xx_hw_init(struct msm_gpu *gpu) gpu->funcs->flush(gpu, gpu->rb[0]); if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; - } else { - /* Print a warning so if we die, we know why */ + } else if (ret == -ENODEV) { + /* + * This device does not use zap shader (but print a warning + * just in case someone got their dt wrong.. hopefully they + * have a debug UART to realize the error of their ways... + * if you mess this up you are about to crash horribly) + */ dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0); + } else { + return ret; } /* Last step - yield the ringbuffer */ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index dc8ec2c94301..686c34d706b0 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -537,12 +537,19 @@ static int a6xx_hw_init(struct msm_gpu *gpu) a6xx_flush(gpu, gpu->rb[0]); if (!a6xx_idle(gpu, gpu->rb[0])) return -EINVAL; - } else { - /* Print a warning so if we die, we know why */ + } else if (ret == -ENODEV) { + /* + * This device does not use zap shader (but print a warning + * just in case someone got their dt wrong.. hopefully they + * have a debug UART to realize the error of their ways... + * if you mess this up you are about to crash horribly) + */ dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); ret = 0; + } else { + return ret; } out: diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c index 24ab6249083a..6f420cc73dbd 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c @@ -255,13 +255,13 @@ static const struct dpu_format dpu_format_map[] = { INTERLEAVED_RGB_FMT(RGB565, 0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT, - C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3, + C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3, false, 2, 0, DPU_FETCH_LINEAR, 1), INTERLEAVED_RGB_FMT(BGR565, 0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT, - C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3, + C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3, false, 2, 0, DPU_FETCH_LINEAR, 1), diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c index 772f0753ed38..aaf2f26f8505 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_dsi_encoder.c @@ -121,7 +121,7 @@ static void mdp4_dsi_encoder_enable(struct drm_encoder *encoder) if (mdp4_dsi_encoder->enabled) return; - mdp4_crtc_set_config(encoder->crtc, + mdp4_crtc_set_config(encoder->crtc, MDP4_DMA_CONFIG_PACK_ALIGN_MSB | MDP4_DMA_CONFIG_DEFLKR_EN | MDP4_DMA_CONFIG_DITHER_EN | diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 05cc04f729d6..e1cc541e0ef2 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1109,8 +1109,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 271aa7bbca92..73127948f54d 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -336,7 +336,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); @@ -479,6 +479,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -519,6 +520,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index b0cfa67d2a57..f509ebd77500 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -724,10 +724,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c index 8f6100db90ed..aa9385d5bfff 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c @@ -411,6 +411,12 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw) if (pll_10nm->slave) dsi_pll_enable_pll_bias(pll_10nm->slave); + rc = dsi_pll_10nm_vco_set_rate(hw,pll_10nm->vco_current_rate, 0); + if (rc) { + pr_err("vco_set_rate failed, rc=%d\n", rc); + return rc; + } + /* Start PLL */ pll_write(pll_10nm->phy_cmn_mmio + REG_DSI_10nm_PHY_CMN_PLL_CNTRL, 0x01); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index c84f0a8b3f2c..b73fbb65e14b 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -441,6 +441,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto err_msm_uninit; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + msm_gem_shrinker_init(ddev); switch (get_mdp_ver(pdev)) { diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 5193b6257061..b856e87574fd 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -451,6 +451,8 @@ nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) asyw->clr.ntfy = armw->ntfy.handle != 0; asyw->clr.sema = armw->sema.handle != 0; asyw->clr.xlut = armw->xlut.handle != 0; + if (asyw->clr.xlut && asyw->visible) + asyw->set.xlut = asyw->xlut.handle != 0; asyw->clr.csc = armw->csc.valid; if (wndw->func->image_clr) asyw->clr.image = armw->image.handle[0] != 0; diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index fa1439941596..0ad5d87b5a8e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -635,10 +635,10 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, unsigned long c, i; int ret = -ENOMEM; - args.src = kcalloc(max, sizeof(args.src), GFP_KERNEL); + args.src = kcalloc(max, sizeof(*args.src), GFP_KERNEL); if (!args.src) goto out; - args.dst = kcalloc(max, sizeof(args.dst), GFP_KERNEL); + args.dst = kcalloc(max, sizeof(*args.dst), GFP_KERNEL); if (!args.dst) goto out_free_src; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 9118df035b28..70bb6bb97af8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -156,7 +156,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify) fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(fence->channel, fctx)) + if (nouveau_fence_update(chan, fctx)) ret = NVIF_NOTIFY_DROP; } spin_unlock_irqrestore(&fctx->lock, flags); diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 77a0c6ad3cef..7ca0a2498532 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -63,14 +63,12 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, { struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_drm *drm = nouveau_bdev(bo->bdev); - struct nouveau_mem *mem; int ret; if (drm->client.device.info.ram_size == 0) return -ENOMEM; ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); - mem = nouveau_mem(reg); if (ret) return ret; @@ -103,11 +101,9 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, { struct nouveau_bo *nvbo = nouveau_bo(bo); struct nouveau_drm *drm = nouveau_bdev(bo->bdev); - struct nouveau_mem *mem; int ret; ret = nouveau_mem_new(&drm->master, nvbo->kind, nvbo->comp, reg); - mem = nouveau_mem(reg); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/core/memory.c b/drivers/gpu/drm/nouveau/nvkm/core/memory.c index e85a08ecd9da..4cc186262d34 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/memory.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/memory.c @@ -91,8 +91,8 @@ nvkm_memory_tags_get(struct nvkm_memory *memory, struct nvkm_device *device, } refcount_set(&tags->refcount, 1); + *ptags = memory->tags = tags; mutex_unlock(&fb->subdev.mutex); - *ptags = tags; return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index bcf32d92ee5a..50e3539f33d2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -74,6 +74,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (debug > subdev->debug) return; + if (!mthd) + return; for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) { u32 base = chan->head * mthd->addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index 500cb08dd608..b57ab5cea9a1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, nent = (fuc.size / sizeof(struct gk20a_fw_av)); - pack = vzalloc((sizeof(*pack) * max_classes) + - (sizeof(*init) * (nent + 1))); + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + + (sizeof(*init) * (nent + max_classes + 1))); if (!pack) { ret = -ENOMEM; goto end; } - init = (void *)(pack + max_classes); + init = (void *)(pack + max_classes + 1); - for (i = 0; i < nent; i++) { - struct gf100_gr_init *ent = &init[i]; + for (i = 0; i < nent; i++, init++) { struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; if (prevclass != class) { - pack[classidx].init = ent; + if (prevclass) /* Add terminator to the method list. */ + init++; + pack[classidx].init = init; pack[classidx].type = class; prevclass = class; if (++classidx >= max_classes) { @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, } } - ent->addr = addr; - ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + init->addr = addr; + init->data = av->data; + init->count = 1; + init->pitch = 1; } *ppack = pack; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c index ca251560d3e0..bb4a4266897c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c @@ -146,6 +146,7 @@ nvkm_fault_dtor(struct nvkm_subdev *subdev) struct nvkm_fault *fault = nvkm_fault(subdev); int i; + nvkm_notify_fini(&fault->nrpfb); nvkm_event_fini(&fault->event); for (i = 0; i < fault->buffer_nr; i++) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index df8b919dcf09..ace6fefba428 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; + *psb = NULL; acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) @@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index, acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); - if (!gsb) { - psb = NULL; + if (!gsb) return -ENOMEM; - } *psb = &gsb->base; ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base); diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 5d487686d25c..72f69709f349 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2061,6 +2061,40 @@ static const struct drm_display_mode mitsubishi_aa070mc01_mode = { .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC, }; +static const struct drm_display_mode logicpd_type_28_mode = { + .clock = 9000, + .hdisplay = 480, + .hsync_start = 480 + 3, + .hsync_end = 480 + 3 + 42, + .htotal = 480 + 3 + 42 + 2, + + .vdisplay = 272, + .vsync_start = 272 + 2, + .vsync_end = 272 + 2 + 11, + .vtotal = 272 + 2 + 11 + 3, + .vrefresh = 60, + .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, +}; + +static const struct panel_desc logicpd_type_28 = { + .modes = &logicpd_type_28_mode, + .num_modes = 1, + .bpc = 8, + .size = { + .width = 105, + .height = 67, + }, + .delay = { + .prepare = 200, + .enable = 200, + .unprepare = 200, + .disable = 200, + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE | + DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE, +}; + static const struct panel_desc mitsubishi_aa070mc01 = { .modes = &mitsubishi_aa070mc01_mode, .num_modes = 1, @@ -3287,6 +3321,9 @@ static const struct of_device_id platform_of_match[] = { }, { .compatible = "lg,lp129qe", .data = &lg_lp129qe, + }, { + .compatible = "logicpd,type28", + .data = &logicpd_type_28, }, { .compatible = "mitsubishi,aa070mc01-ca1", .data = &mitsubishi_aa070mc01, diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 88b431a267af..273d67e251c2 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -166,6 +166,7 @@ panfrost_lookup_bos(struct drm_device *dev, break; } + atomic_inc(&bo->gpu_usecount); job->mappings[i] = mapping; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index ca1bc9019600..b3517ff9630c 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -30,6 +30,12 @@ struct panfrost_gem_object { struct mutex lock; } mappings; + /* + * Count the number of jobs referencing this BO so we don't let the + * shrinker reclaim this object prematurely. + */ + atomic_t gpu_usecount; + bool noexec :1; bool is_heap :1; }; diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index f5dd7b29bc95..288e46c40673 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -41,6 +41,9 @@ static bool panfrost_gem_purge(struct drm_gem_object *obj) struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); struct panfrost_gem_object *bo = to_panfrost_bo(obj); + if (atomic_read(&bo->gpu_usecount)) + return false; + if (!mutex_trylock(&shmem->pages_lock)) return false; diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index e364ee00f3d0..4d383831c1fc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -269,8 +269,13 @@ static void panfrost_job_cleanup(struct kref *ref) dma_fence_put(job->render_done_fence); if (job->mappings) { - for (i = 0; i < job->bo_count; i++) + for (i = 0; i < job->bo_count; i++) { + if (!job->mappings[i]) + break; + + atomic_dec(&job->mappings[i]->obj->gpu_usecount); panfrost_gem_mapping_put(job->mappings[i]); + } kvfree(job->mappings); } diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 763cfca886a7..5d75f8cf6477 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -151,7 +151,12 @@ u32 panfrost_mmu_as_get(struct panfrost_device *pfdev, struct panfrost_mmu *mmu) as = mmu->as; if (as >= 0) { int en = atomic_inc_return(&mmu->as_count); - WARN_ON(en >= NUM_JOB_SLOTS); + + /* + * AS can be retained by active jobs or a perfcnt context, + * hence the '+ 1' here. + */ + WARN_ON(en >= (NUM_JOB_SLOTS + 1)); list_move(&mmu->list, &pfdev->as_lru_list); goto out; @@ -596,33 +601,27 @@ static irqreturn_t panfrost_mmu_irq_handler_thread(int irq, void *data) source_id = (fault_status >> 16); /* Page fault only */ - if ((status & mask) == BIT(i)) { - WARN_ON(exception_type < 0xC1 || exception_type > 0xC4); - + ret = -1; + if ((status & mask) == BIT(i) && (exception_type & 0xF8) == 0xC0) ret = panfrost_mmu_map_fault_addr(pfdev, i, addr); - if (!ret) { - mmu_write(pfdev, MMU_INT_CLEAR, BIT(i)); - status &= ~mask; - continue; - } - } - /* terminal fault, print info about the fault */ - dev_err(pfdev->dev, - "Unhandled Page fault in AS%d at VA 0x%016llX\n" - "Reason: %s\n" - "raw fault status: 0x%X\n" - "decoded fault status: %s\n" - "exception type 0x%X: %s\n" - "access type 0x%X: %s\n" - "source id 0x%X\n", - i, addr, - "TODO", - fault_status, - (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), - exception_type, panfrost_exception_name(pfdev, exception_type), - access_type, access_type_name(pfdev, fault_status), - source_id); + if (ret) + /* terminal fault, print info about the fault */ + dev_err(pfdev->dev, + "Unhandled Page fault in AS%d at VA 0x%016llX\n" + "Reason: %s\n" + "raw fault status: 0x%X\n" + "decoded fault status: %s\n" + "exception type 0x%X: %s\n" + "access type 0x%X: %s\n" + "source id 0x%X\n", + i, addr, + "TODO", + fault_status, + (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), + exception_type, panfrost_exception_name(pfdev, exception_type), + access_type, access_type_name(pfdev, fault_status), + source_id); mmu_write(pfdev, MMU_INT_CLEAR, mask); diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c index 684820448be3..6913578d5aa7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c +++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c @@ -73,7 +73,7 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, struct panfrost_file_priv *user = file_priv->driver_priv; struct panfrost_perfcnt *perfcnt = pfdev->perfcnt; struct drm_gem_shmem_object *bo; - u32 cfg; + u32 cfg, as; int ret; if (user == perfcnt->user) @@ -126,12 +126,8 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev, perfcnt->user = user; - /* - * Always use address space 0 for now. - * FIXME: this needs to be updated when we start using different - * address space. - */ - cfg = GPU_PERFCNT_CFG_AS(0) | + as = panfrost_mmu_as_get(pfdev, perfcnt->mapping->mmu); + cfg = GPU_PERFCNT_CFG_AS(as) | GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL); /* @@ -195,6 +191,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev, drm_gem_shmem_vunmap(&perfcnt->mapping->obj->base.base, perfcnt->buf); perfcnt->buf = NULL; panfrost_gem_close(&perfcnt->mapping->obj->base.base, file_priv); + panfrost_mmu_as_put(pfdev, perfcnt->mapping->mmu); panfrost_gem_mapping_put(perfcnt->mapping); perfcnt->mapping = NULL; pm_runtime_mark_last_busy(pfdev->dev); diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c index 611cbe7aee69..bfc1631093e9 100644 --- a/drivers/gpu/drm/qxl/qxl_kms.c +++ b/drivers/gpu/drm/qxl/qxl_kms.c @@ -184,7 +184,7 @@ int qxl_device_init(struct qxl_device *qdev, if (!qxl_check_device(qdev)) { r = -ENODEV; - goto surface_mapping_free; + goto rom_unmap; } r = qxl_bo_init(qdev); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index e81b01f8db90..0826efd9b5f5 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -127,6 +127,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index fd74e2611185..8696af1ee14d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -325,6 +326,7 @@ static int radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned long flags = 0; + struct drm_device *dev; int ret; if (!ent) @@ -365,7 +367,44 @@ static int radeon_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; +#ifdef __alpha__ + dev->hose = pdev->sysdata; +#endif + + pci_set_drvdata(pdev, dev); + + if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP)) + dev->agp = drm_agp_init(dev); + if (dev->agp) { + dev->agp->agp_mtrr = arch_phys_wc_add( + dev->agp->agp_info.aper_base, + dev->agp->agp_info.aper_size * + 1024 * 1024); + } + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_agp; + + return 0; + +err_agp: + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + pci_disable_device(pdev); +err_free: + drm_dev_put(dev); + return ret; } static void @@ -575,7 +614,7 @@ radeon_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | DRIVER_GEM | DRIVER_RENDER, + DRIVER_GEM | DRIVER_RENDER, .load = radeon_driver_load_kms, .open = radeon_driver_open_kms, .postclose = radeon_driver_postclose_kms, diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index e85c554eeaa9..2bb0187c5bc7 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -77,6 +78,11 @@ void radeon_driver_unload_kms(struct drm_device *dev) radeon_modeset_fini(rdev); radeon_device_fini(rdev); + if (dev->agp) + arch_phys_wc_del(dev->agp->agp_mtrr); + kfree(dev->agp); + dev->agp = NULL; + done_free: kfree(rdev); dev->dev_private = NULL; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 0d59f390de19..662d8075f411 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -542,6 +542,7 @@ static int rcar_du_properties_init(struct rcar_du_device *rcdu) static int rcar_du_vsps_init(struct rcar_du_device *rcdu) { const struct device_node *np = rcdu->dev->of_node; + const char *vsps_prop_name = "renesas,vsps"; struct of_phandle_args args; struct { struct device_node *np; @@ -557,15 +558,21 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) * entry contains a pointer to the VSP DT node and a bitmask of the * connected DU CRTCs. */ - cells = of_property_count_u32_elems(np, "vsps") / rcdu->num_crtcs - 1; + ret = of_property_count_u32_elems(np, vsps_prop_name); + if (ret < 0) { + /* Backward compatibility with old DTBs. */ + vsps_prop_name = "vsps"; + ret = of_property_count_u32_elems(np, vsps_prop_name); + } + cells = ret / rcdu->num_crtcs - 1; if (cells > 1) return -EINVAL; for (i = 0; i < rcdu->num_crtcs; ++i) { unsigned int j; - ret = of_parse_phandle_with_fixed_args(np, "vsps", cells, i, - &args); + ret = of_parse_phandle_with_fixed_args(np, vsps_prop_name, + cells, i, &args); if (ret < 0) goto error; @@ -587,8 +594,8 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) /* * Store the VSP pointer and pipe index in the CRTC. If the - * second cell of the 'vsps' specifier isn't present, default - * to 0 to remain compatible with older DT bindings. + * second cell of the 'renesas,vsps' specifier isn't present, + * default to 0 to remain compatible with older DT bindings. */ rcdu->crtcs[i].vsp = &rcdu->vsps[j]; rcdu->crtcs[i].vsp_pipe = cells >= 1 ? args.args[0] : 0; diff --git a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h index 6d61a0eb5d64..84e6bc050bf2 100644 --- a/drivers/gpu/drm/selftests/drm_cmdline_selftests.h +++ b/drivers/gpu/drm/selftests/drm_cmdline_selftests.h @@ -53,6 +53,7 @@ cmdline_test(drm_cmdline_test_rotate_0) cmdline_test(drm_cmdline_test_rotate_90) cmdline_test(drm_cmdline_test_rotate_180) cmdline_test(drm_cmdline_test_rotate_270) +cmdline_test(drm_cmdline_test_rotate_multiple) cmdline_test(drm_cmdline_test_rotate_invalid_val) cmdline_test(drm_cmdline_test_rotate_truncated) cmdline_test(drm_cmdline_test_hmirror) diff --git a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c index 013de9d27c35..035f86c5d648 100644 --- a/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c +++ b/drivers/gpu/drm/selftests/test-drm_cmdline_parser.c @@ -856,6 +856,17 @@ static int drm_cmdline_test_rotate_270(void *ignored) return 0; } +static int drm_cmdline_test_rotate_multiple(void *ignored) +{ + struct drm_cmdline_mode mode = { }; + + FAIL_ON(drm_mode_parse_command_line_for_connector("720x480,rotate=0,rotate=90", + &no_connector, + &mode)); + + return 0; +} + static int drm_cmdline_test_rotate_invalid_val(void *ignored) { struct drm_cmdline_mode mode = { }; @@ -888,7 +899,7 @@ static int drm_cmdline_test_hmirror(void *ignored) FAIL_ON(!mode.specified); FAIL_ON(mode.xres != 720); FAIL_ON(mode.yres != 480); - FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_X); + FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_X)); FAIL_ON(mode.refresh_specified); @@ -913,7 +924,7 @@ static int drm_cmdline_test_vmirror(void *ignored) FAIL_ON(!mode.specified); FAIL_ON(mode.xres != 720); FAIL_ON(mode.yres != 480); - FAIL_ON(mode.rotation_reflection != DRM_MODE_REFLECT_Y); + FAIL_ON(mode.rotation_reflection != (DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y)); FAIL_ON(mode.refresh_specified); diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index a5757b11b730..5b54eff12cc0 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -85,7 +85,6 @@ static int sun4i_drv_bind(struct device *dev) } drm_mode_config_init(drm); - drm->mode_config.allow_fb_modifiers = true; ret = component_bind_all(drm->dev, drm); if (ret) { diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index 8b803eb903b8..18b4881f4481 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -106,48 +106,128 @@ static const struct de2_fmt_info de2_formats[] = { .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XRGB4444, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_ABGR4444, .de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XBGR4444, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_RGBA4444, .de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_RGBX4444, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_BGRA4444, .de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_BGRX4444, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA4444, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_ARGB1555, .de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XRGB1555, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB1555, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_ABGR1555, .de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_XBGR1555, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR1555, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_RGBA5551, .de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_RGBX5551, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA5551, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_BGRA5551, .de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551, .rgb = true, .csc = SUN8I_CSC_MODE_OFF, }, + { + /* for DE2 VI layer which ignores alpha */ + .drm_fmt = DRM_FORMAT_BGRX5551, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA5551, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_ARGB2101010, + .de2_fmt = SUN8I_MIXER_FBFMT_ARGB2101010, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_ABGR2101010, + .de2_fmt = SUN8I_MIXER_FBFMT_ABGR2101010, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_RGBA1010102, + .de2_fmt = SUN8I_MIXER_FBFMT_RGBA1010102, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, + { + .drm_fmt = DRM_FORMAT_BGRA1010102, + .de2_fmt = SUN8I_MIXER_FBFMT_BGRA1010102, + .rgb = true, + .csc = SUN8I_CSC_MODE_OFF, + }, { .drm_fmt = DRM_FORMAT_UYVY, .de2_fmt = SUN8I_MIXER_FBFMT_UYVY, @@ -196,12 +276,6 @@ static const struct de2_fmt_info de2_formats[] = { .rgb = false, .csc = SUN8I_CSC_MODE_YUV2RGB, }, - { - .drm_fmt = DRM_FORMAT_YUV444, - .de2_fmt = SUN8I_MIXER_FBFMT_RGB888, - .rgb = true, - .csc = SUN8I_CSC_MODE_YUV2RGB, - }, { .drm_fmt = DRM_FORMAT_YUV422, .de2_fmt = SUN8I_MIXER_FBFMT_YUV422, @@ -220,12 +294,6 @@ static const struct de2_fmt_info de2_formats[] = { .rgb = false, .csc = SUN8I_CSC_MODE_YUV2RGB, }, - { - .drm_fmt = DRM_FORMAT_YVU444, - .de2_fmt = SUN8I_MIXER_FBFMT_RGB888, - .rgb = true, - .csc = SUN8I_CSC_MODE_YVU2RGB, - }, { .drm_fmt = DRM_FORMAT_YVU422, .de2_fmt = SUN8I_MIXER_FBFMT_YUV422, @@ -244,6 +312,18 @@ static const struct de2_fmt_info de2_formats[] = { .rgb = false, .csc = SUN8I_CSC_MODE_YVU2RGB, }, + { + .drm_fmt = DRM_FORMAT_P010, + .de2_fmt = SUN8I_MIXER_FBFMT_P010_YUV, + .rgb = false, + .csc = SUN8I_CSC_MODE_YUV2RGB, + }, + { + .drm_fmt = DRM_FORMAT_P210, + .de2_fmt = SUN8I_MIXER_FBFMT_P210_YUV, + .rgb = false, + .csc = SUN8I_CSC_MODE_YUV2RGB, + }, }; const struct de2_fmt_info *sun8i_mixer_format_info(u32 format) diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index c6cc94057faf..345b28b0a80a 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -93,6 +93,10 @@ #define SUN8I_MIXER_FBFMT_ABGR1555 17 #define SUN8I_MIXER_FBFMT_RGBA5551 18 #define SUN8I_MIXER_FBFMT_BGRA5551 19 +#define SUN8I_MIXER_FBFMT_ARGB2101010 20 +#define SUN8I_MIXER_FBFMT_ABGR2101010 21 +#define SUN8I_MIXER_FBFMT_RGBA1010102 22 +#define SUN8I_MIXER_FBFMT_BGRA1010102 23 #define SUN8I_MIXER_FBFMT_YUYV 0 #define SUN8I_MIXER_FBFMT_UYVY 1 @@ -109,6 +113,13 @@ /* format 12 is semi-planar YUV411 UVUV */ /* format 13 is semi-planar YUV411 VUVU */ #define SUN8I_MIXER_FBFMT_YUV411 14 +/* format 15 doesn't exist */ +/* format 16 is P010 YVU */ +#define SUN8I_MIXER_FBFMT_P010_YUV 17 +/* format 18 is P210 YVU */ +#define SUN8I_MIXER_FBFMT_P210_YUV 19 +/* format 20 is packed YVU444 10-bit */ +/* format 21 is packed YUV444 10-bit */ /* * Sub-engines listed bellow are unused for now. The EN registers are here only diff --git a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c index 42d445d23773..b8398ca18b0f 100644 --- a/drivers/gpu/drm/sun4i/sun8i_vi_layer.c +++ b/drivers/gpu/drm/sun4i/sun8i_vi_layer.c @@ -398,24 +398,66 @@ static const struct drm_plane_funcs sun8i_vi_layer_funcs = { }; /* - * While all RGB formats are supported, VI planes don't support - * alpha blending, so there is no point having formats with alpha - * channel if their opaque analog exist. + * While DE2 VI layer supports same RGB formats as UI layer, alpha + * channel is ignored. This structure lists all unique variants + * where alpha channel is replaced with "don't care" (X) channel. */ static const u32 sun8i_vi_layer_formats[] = { + DRM_FORMAT_BGR565, + DRM_FORMAT_BGR888, + DRM_FORMAT_BGRX4444, + DRM_FORMAT_BGRX5551, + DRM_FORMAT_BGRX8888, + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_RGBX4444, + DRM_FORMAT_RGBX5551, + DRM_FORMAT_RGBX8888, + DRM_FORMAT_XBGR1555, + DRM_FORMAT_XBGR4444, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_XRGB8888, + + DRM_FORMAT_NV16, + DRM_FORMAT_NV12, + DRM_FORMAT_NV21, + DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_YUV411, + DRM_FORMAT_YUV420, + DRM_FORMAT_YUV422, + DRM_FORMAT_YVU411, + DRM_FORMAT_YVU420, + DRM_FORMAT_YVU422, +}; + +static const u32 sun8i_vi_layer_de3_formats[] = { DRM_FORMAT_ABGR1555, + DRM_FORMAT_ABGR2101010, DRM_FORMAT_ABGR4444, + DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB1555, + DRM_FORMAT_ARGB2101010, DRM_FORMAT_ARGB4444, + DRM_FORMAT_ARGB8888, DRM_FORMAT_BGR565, DRM_FORMAT_BGR888, + DRM_FORMAT_BGRA1010102, DRM_FORMAT_BGRA5551, DRM_FORMAT_BGRA4444, + DRM_FORMAT_BGRA8888, DRM_FORMAT_BGRX8888, DRM_FORMAT_RGB565, DRM_FORMAT_RGB888, + DRM_FORMAT_RGBA1010102, DRM_FORMAT_RGBA4444, DRM_FORMAT_RGBA5551, + DRM_FORMAT_RGBA8888, DRM_FORMAT_RGBX8888, DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, @@ -424,6 +466,8 @@ static const u32 sun8i_vi_layer_formats[] = { DRM_FORMAT_NV12, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_P010, + DRM_FORMAT_P210, DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, @@ -431,11 +475,9 @@ static const u32 sun8i_vi_layer_formats[] = { DRM_FORMAT_YUV411, DRM_FORMAT_YUV420, DRM_FORMAT_YUV422, - DRM_FORMAT_YUV444, DRM_FORMAT_YVU411, DRM_FORMAT_YVU420, DRM_FORMAT_YVU422, - DRM_FORMAT_YVU444, }; struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm, @@ -443,19 +485,27 @@ struct sun8i_vi_layer *sun8i_vi_layer_init_one(struct drm_device *drm, int index) { u32 supported_encodings, supported_ranges; + unsigned int plane_cnt, format_count; struct sun8i_vi_layer *layer; - unsigned int plane_cnt; + const u32 *formats; int ret; layer = devm_kzalloc(drm->dev, sizeof(*layer), GFP_KERNEL); if (!layer) return ERR_PTR(-ENOMEM); + if (mixer->cfg->is_de3) { + formats = sun8i_vi_layer_de3_formats; + format_count = ARRAY_SIZE(sun8i_vi_layer_de3_formats); + } else { + formats = sun8i_vi_layer_formats; + format_count = ARRAY_SIZE(sun8i_vi_layer_formats); + } + /* possible crtcs are set later */ ret = drm_universal_plane_init(drm, &layer->plane, 0, &sun8i_vi_layer_funcs, - sun8i_vi_layer_formats, - ARRAY_SIZE(sun8i_vi_layer_formats), + formats, format_count, NULL, DRM_PLANE_TYPE_OVERLAY, NULL); if (ret) { dev_err(drm->dev, "Couldn't initialize layer\n"); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index f455ce71e85d..b6aba7507043 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1037,23 +1037,9 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt, free_pages((unsigned long)virt, get_order(size)); } -static int host1x_drm_probe(struct host1x_device *dev) +static bool host1x_drm_wants_iommu(struct host1x_device *dev) { - struct drm_driver *driver = &tegra_drm_driver; struct iommu_domain *domain; - struct tegra_drm *tegra; - struct drm_device *drm; - int err; - - drm = drm_dev_alloc(driver, &dev->dev); - if (IS_ERR(drm)) - return PTR_ERR(drm); - - tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); - if (!tegra) { - err = -ENOMEM; - goto put; - } /* * If the Tegra DRM clients are backed by an IOMMU, push buffers are @@ -1082,9 +1068,38 @@ static int host1x_drm_probe(struct host1x_device *dev) * up the device tree appropriately. This is considered an problem * of integration, so care must be taken for the DT to be consistent. */ - domain = iommu_get_domain_for_dev(drm->dev->parent); + domain = iommu_get_domain_for_dev(dev->dev.parent); + + /* + * Tegra20 and Tegra30 don't support addressing memory beyond the + * 32-bit boundary, so the regular GATHER opcodes will always be + * sufficient and whether or not the host1x is attached to an IOMMU + * doesn't matter. + */ + if (!domain && dma_get_mask(dev->dev.parent) <= DMA_BIT_MASK(32)) + return true; + + return domain != NULL; +} + +static int host1x_drm_probe(struct host1x_device *dev) +{ + struct drm_driver *driver = &tegra_drm_driver; + struct tegra_drm *tegra; + struct drm_device *drm; + int err; + + drm = drm_dev_alloc(driver, &dev->dev); + if (IS_ERR(drm)) + return PTR_ERR(drm); + + tegra = kzalloc(sizeof(*tegra), GFP_KERNEL); + if (!tegra) { + err = -ENOMEM; + goto put; + } - if (domain && iommu_present(&platform_bus_type)) { + if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) { tegra->domain = iommu_domain_alloc(&platform_bus_type); if (!tegra->domain) { err = -ENOMEM; diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index bc15b430156d..c46b4d4190ac 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -60,8 +60,16 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo, /* * If we've manually mapped the buffer object through the IOMMU, make * sure to return the IOVA address of our mapping. + * + * Similarly, for buffers that have been allocated by the DMA API the + * physical address can be used for devices that are not attached to + * an IOMMU. For these devices, callers must pass a valid pointer via + * the @phys argument. + * + * Imported buffers were also already mapped at import time, so the + * existing mapping can be reused. */ - if (phys && obj->mm) { + if (phys) { *phys = obj->iova; return NULL; } diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c index cadcdd9ea427..9ccfb56e9b01 100644 --- a/drivers/gpu/drm/tegra/plane.c +++ b/drivers/gpu/drm/tegra/plane.c @@ -3,6 +3,8 @@ * Copyright (C) 2017 NVIDIA CORPORATION. All rights reserved. */ +#include + #include #include #include @@ -107,21 +109,27 @@ const struct drm_plane_funcs tegra_plane_funcs = { static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dc->dev); unsigned int i; int err; for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + dma_addr_t phys_addr, *phys; + struct sg_table *sgt; - if (!dc->client.group) { - struct sg_table *sgt; + if (!domain || dc->client.group) + phys = &phys_addr; + else + phys = NULL; - sgt = host1x_bo_pin(dc->dev, &bo->base, NULL); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + sgt = host1x_bo_pin(dc->dev, &bo->base, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } + if (sgt) { err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (err == 0) { @@ -143,7 +151,7 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) state->iova[i] = sg_dma_address(sgt->sgl); state->sgt[i] = sgt; } else { - state->iova[i] = bo->iova; + state->iova[i] = phys_addr; } } @@ -156,9 +164,11 @@ static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state) struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); struct sg_table *sgt = state->sgt[i]; - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } @@ -172,17 +182,13 @@ static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state) for (i = 0; i < state->base.fb->format->num_planes; i++) { struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i); + struct sg_table *sgt = state->sgt[i]; - if (!dc->client.group) { - struct sg_table *sgt = state->sgt[i]; - - if (sgt) { - dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, - DMA_TO_DEVICE); - host1x_bo_unpin(dc->dev, &bo->base, sgt); - } - } + if (sgt) + dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + host1x_bo_unpin(dc->dev, &bo->base, sgt); state->iova[i] = DMA_MAPPING_ERROR; state->sgt[i] = NULL; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 6b0883a1776e..b40915638e13 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -516,6 +516,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, fbo->base.base.resv = &fbo->base.base._resv; dma_resv_init(&fbo->base.base._resv); + fbo->base.base.dev = NULL; ret = dma_resv_trylock(&fbo->base.base._resv); WARN_ON(!ret); diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 5bd60ded3d81..909eba43664a 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -196,9 +196,10 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, return ERR_CAST(obj); ret = drm_gem_handle_create(file, &obj->base, handle); - drm_gem_object_put_unlocked(&obj->base); - if (ret) + if (ret) { + drm_gem_object_put_unlocked(&obj->base); return ERR_PTR(ret); + } return &obj->base; } @@ -221,7 +222,9 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, args->size = gem_object->size; args->pitch = pitch; - DRM_DEBUG("Created object of size %lld\n", size); + drm_gem_object_put_unlocked(gem_object); + + DRM_DEBUG("Created object of size %llu\n", args->size); return 0; } diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 0b56ba005e25..eedae2a7b532 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -38,6 +38,7 @@ #include #include #include +#include #define DRIVER_NAME "virtio_gpu" #define DRIVER_DESC "virtio GPU" @@ -312,13 +313,13 @@ void virtio_gpu_cmd_submit(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, - struct virtio_gpu_box *box, + struct drm_virtgpu_3d_box *box, struct virtio_gpu_object_array *objs, struct virtio_gpu_fence *fence); void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, - struct virtio_gpu_box *box, + struct drm_virtgpu_3d_box *box, struct virtio_gpu_object_array *objs, struct virtio_gpu_fence *fence); void diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 9af1ec62434f..205ec4abae2b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -33,17 +33,6 @@ #include "virtgpu_drv.h" -static void convert_to_hw_box(struct virtio_gpu_box *dst, - const struct drm_virtgpu_3d_box *src) -{ - dst->x = cpu_to_le32(src->x); - dst->y = cpu_to_le32(src->y); - dst->z = cpu_to_le32(src->z); - dst->w = cpu_to_le32(src->w); - dst->h = cpu_to_le32(src->h); - dst->d = cpu_to_le32(src->d); -} - static int virtio_gpu_map_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -304,7 +293,6 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, struct virtio_gpu_fence *fence; int ret; u32 offset = args->offset; - struct virtio_gpu_box box; if (vgdev->has_virgl_3d == false) return -ENOSYS; @@ -317,8 +305,6 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, if (ret != 0) goto err_put_free; - convert_to_hw_box(&box, &args->box); - fence = virtio_gpu_fence_alloc(vgdev); if (!fence) { ret = -ENOMEM; @@ -326,7 +312,7 @@ static int virtio_gpu_transfer_from_host_ioctl(struct drm_device *dev, } virtio_gpu_cmd_transfer_from_host_3d (vgdev, vfpriv->ctx_id, offset, args->level, - &box, objs, fence); + &args->box, objs, fence); dma_fence_put(&fence->f); return 0; @@ -345,7 +331,6 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data, struct drm_virtgpu_3d_transfer_to_host *args = data; struct virtio_gpu_object_array *objs; struct virtio_gpu_fence *fence; - struct virtio_gpu_box box; int ret; u32 offset = args->offset; @@ -353,11 +338,10 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data, if (objs == NULL) return -ENOENT; - convert_to_hw_box(&box, &args->box); if (!vgdev->has_virgl_3d) { virtio_gpu_cmd_transfer_to_host_2d (vgdev, offset, - box.w, box.h, box.x, box.y, + args->box.w, args->box.h, args->box.x, args->box.y, objs, NULL); } else { ret = virtio_gpu_array_lock_resv(objs); @@ -372,7 +356,7 @@ static int virtio_gpu_transfer_to_host_ioctl(struct drm_device *dev, void *data, virtio_gpu_cmd_transfer_to_host_3d (vgdev, vfpriv ? vfpriv->ctx_id : 0, offset, - args->level, &box, objs, fence); + args->level, &args->box, objs, fence); dma_fence_put(&fence->f); } return 0; diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 017a9e0fc3bb..3af7ec80c7da 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -42,8 +42,8 @@ static int virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev, * "f91a9dd35715 Fix unlinking resources from hash * table." (Feb 2019) fixes the bug. */ - static int handle; - handle++; + static atomic_t seqno = ATOMIC_INIT(0); + int handle = atomic_inc_return(&seqno); *resid = handle + 1; } else { int handle = ida_alloc(&vgdev->resource_ida, GFP_KERNEL); @@ -99,6 +99,7 @@ struct drm_gem_object *virtio_gpu_create_object(struct drm_device *dev, return NULL; bo->base.base.funcs = &virtio_gpu_gem_funcs; + bo->base.map_cached = true; return &bo->base.base; } diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 74ad3bc3ebe8..9274c4063c70 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -40,6 +40,17 @@ + MAX_INLINE_CMD_SIZE \ + MAX_INLINE_RESP_SIZE) +static void convert_to_hw_box(struct virtio_gpu_box *dst, + const struct drm_virtgpu_3d_box *src) +{ + dst->x = cpu_to_le32(src->x); + dst->y = cpu_to_le32(src->y); + dst->z = cpu_to_le32(src->z); + dst->w = cpu_to_le32(src->w); + dst->h = cpu_to_le32(src->h); + dst->d = cpu_to_le32(src->d); +} + void virtio_gpu_ctrl_ack(struct virtqueue *vq) { struct drm_device *dev = vq->vdev->priv; @@ -965,7 +976,7 @@ virtio_gpu_cmd_resource_create_3d(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, - struct virtio_gpu_box *box, + struct drm_virtgpu_3d_box *box, struct virtio_gpu_object_array *objs, struct virtio_gpu_fence *fence) { @@ -987,7 +998,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D); cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); - cmd_p->box = *box; + convert_to_hw_box(&cmd_p->box, box); cmd_p->offset = cpu_to_le64(offset); cmd_p->level = cpu_to_le32(level); @@ -997,7 +1008,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev, void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, uint32_t ctx_id, uint64_t offset, uint32_t level, - struct virtio_gpu_box *box, + struct drm_virtgpu_3d_box *box, struct virtio_gpu_object_array *objs, struct virtio_gpu_fence *fence) { @@ -1013,7 +1024,7 @@ void virtio_gpu_cmd_transfer_from_host_3d(struct virtio_gpu_device *vgdev, cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D); cmd_p->hdr.ctx_id = cpu_to_le32(ctx_id); cmd_p->resource_id = cpu_to_le32(bo->hw_res_handle); - cmd_p->box = *box; + convert_to_hw_box(&cmd_p->box, box); cmd_p->offset = cpu_to_le64(offset); cmd_p->level = cpu_to_le32(level); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 4ac55fc2bf97..44d858ce4ce7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -209,8 +209,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 25ca54de8fc5..0d53c08e9972 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -101,9 +102,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { struct host1x_client *client = job->client; struct device *dev = client->dev; + struct iommu_domain *domain; unsigned int i; int err; + domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { @@ -117,7 +120,19 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - if (client->group) + /* + * If the client device is not attached to an IOMMU, the + * physical address of the buffer object can be used. + * + * Similarly, when an IOMMU domain is shared between all + * host1x clients, the IOVA is already available, so no + * need to map the buffer object again. + * + * XXX Note that this isn't always safe to do because it + * relies on an assumption that no cache maintenance is + * needed on the buffer objects. + */ + if (!domain || client->group) phys = &phys_addr; else phys = NULL; @@ -176,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; + dma_addr_t *phys; unsigned int j; g->bo = host1x_bo_get(g->bo); @@ -184,7 +200,17 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - sgt = host1x_bo_pin(host->dev, g->bo, NULL); + /** + * If the host1x is not attached to an IOMMU, there is no need + * to map the buffer object for the host1x, since the physical + * address can simply be used. + */ + if (!iommu_get_domain_for_dev(host->dev)) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(host->dev, g->bo, phys); if (IS_ERR(sgt)) { err = PTR_ERR(sgt); goto unpin; @@ -214,7 +240,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) job->unpins[job->num_unpins].size = gather_size; phys_addr = iova_dma_addr(&host->iova, alloc); - } else { + } else if (sgt) { err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE); if (!err) { diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index ae79a7c66737..fa704153cb00 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -730,7 +730,7 @@ static int alps_input_configured(struct hid_device *hdev, struct hid_input *hi) if (data->has_sp) { input2 = input_allocate_device(); if (!input2) { - input_free_device(input2); + ret = -ENOMEM; goto exit; } diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6ac8becc2372..d732d1d10caf 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -340,7 +340,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index 3f6abd190df4..db6da21ade06 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -174,6 +174,7 @@ static __u8 pid0902_rdesc_fixed[] = { struct bigben_device { struct hid_device *hid; struct hid_report *report; + bool removed; u8 led_state; /* LED1 = 1 .. LED4 = 8 */ u8 right_motor_on; /* right motor off/on 0/1 */ u8 left_motor_force; /* left motor force 0-255 */ @@ -190,6 +191,9 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; + if (bigben->removed) + return; + if (bigben->work_led) { bigben->work_led = false; report_field->value[0] = 0x01; /* 1 = led message */ @@ -220,10 +224,16 @@ static void bigben_worker(struct work_struct *work) static int hid_bigben_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { - struct bigben_device *bigben = data; + struct hid_device *hid = input_get_drvdata(dev); + struct bigben_device *bigben = hid_get_drvdata(hid); u8 right_motor_on; u8 left_motor_force; + if (!bigben) { + hid_err(hid, "no device data\n"); + return 0; + } + if (effect->type != FF_RUMBLE) return 0; @@ -298,8 +308,8 @@ static void bigben_remove(struct hid_device *hid) { struct bigben_device *bigben = hid_get_drvdata(hid); + bigben->removed = true; cancel_work_sync(&bigben->worker); - hid_hw_close(hid); hid_hw_stop(hid); } @@ -319,6 +329,7 @@ static int bigben_probe(struct hid_device *hid, return -ENOMEM; hid_set_drvdata(hid, bigben); bigben->hid = hid; + bigben->removed = false; error = hid_parse(hid); if (error) { @@ -341,10 +352,10 @@ static int bigben_probe(struct hid_device *hid, INIT_WORK(&bigben->worker, bigben_worker); - error = input_ff_create_memless(hidinput->input, bigben, + error = input_ff_create_memless(hidinput->input, NULL, hid_bigben_play_effect); if (error) - return error; + goto error_hw_stop; name_sz = strlen(dev_name(&hid->dev)) + strlen(":red:bigben#") + 1; @@ -354,8 +365,10 @@ static int bigben_probe(struct hid_device *hid, sizeof(struct led_classdev) + name_sz, GFP_KERNEL ); - if (!led) - return -ENOMEM; + if (!led) { + error = -ENOMEM; + goto error_hw_stop; + } name = (void *)(&led[1]); snprintf(name, name_sz, "%s:red:bigben%d", @@ -369,7 +382,7 @@ static int bigben_probe(struct hid_device *hid, bigben->leds[n] = led; error = devm_led_classdev_register(&hid->dev, led); if (error) - return error; + goto error_hw_stop; } /* initial state: LED1 is on, no rumble effect */ @@ -383,6 +396,10 @@ static int bigben_probe(struct hid_device *hid, hid_info(hid, "LED and force feedback support for BigBen gamepad\n"); return 0; + +error_hw_stop: + hid_hw_stop(hid); + return error; } static __u8 *bigben_report_fixup(struct hid_device *hid, __u8 *rdesc, diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 851fe54ea59e..359616e3efbb 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1741,7 +1741,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 2aa4ed157aec..85a054f1ce38 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -532,6 +532,8 @@ static const struct hid_device_id hammer_devices[] = { USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MOONBALL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3a400ce603c4..9f2213426556 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -478,6 +478,7 @@ #define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030 #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d +#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f @@ -726,6 +727,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 +#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_VENDOR_ID_LG 0x1fd2 #define USB_DEVICE_ID_LG_MULTITOUCH 0x0064 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index c436e12feb23..6c55682c5974 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -41,8 +41,9 @@ static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ - { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 0e7b2d998395..3735546bb524 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -103,6 +103,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS), HID_QUIRK_NOGET }, diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index d31ea82b84c1..a66f08041a1a 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -341,6 +341,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index a970b809d778..4140dea693e9 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -932,9 +932,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b155d0052981..a02ce43d778d 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1217,10 +1217,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int i, j; struct page *pg; - if (num_pages < alloc_unit) - return 0; - - for (i = 0; (i * alloc_unit) < num_pages; i++) { + for (i = 0; i < num_pages / alloc_unit; i++) { if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > HV_HYP_PAGE_SIZE) return i * alloc_unit; @@ -1258,7 +1255,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, } - return num_pages; + return i * alloc_unit; } static void balloon_up(struct work_struct *dummy) @@ -1273,9 +1270,6 @@ static void balloon_up(struct work_struct *dummy) long avail_pages; unsigned long floor; - /* The host balloons pages in 2M granularity. */ - WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); - /* * We will attempt 2M allocations. However, if we fail to * allocate 2M chunks, we will go back to PAGE_SIZE allocations. @@ -1285,14 +1279,13 @@ static void balloon_up(struct work_struct *dummy) avail_pages = si_mem_available(); floor = compute_balloon_floor(); - /* Refuse to balloon below the floor, keep the 2M granularity. */ + /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { pr_warn("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); num_pages = avail_pages > floor ? (avail_pages - floor) : 0; - num_pages -= num_pages % PAGES_IN_2M; } while (!done) { diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 23dfe848979a..5ad41c2a0ea1 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -385,6 +385,16 @@ config SENSORS_ATXP1 This driver can also be built as a module. If so, the module will be called atxp1. +config SENSORS_DRIVETEMP + tristate "Hard disk drives with temperature sensors" + depends on SCSI && ATA + help + If you say yes you get support for the temperature sensor on + hard disk drives. + + This driver can also be built as a module. If so, the module + will be called satatemp. + config SENSORS_DS620 tristate "Dallas Semiconductor DS620" depends on I2C diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 6db5db9cdc29..b32b4415bfad 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_SENSORS_DA9052_ADC)+= da9052-hwmon.o obj-$(CONFIG_SENSORS_DA9055)+= da9055-hwmon.o obj-$(CONFIG_SENSORS_DELL_SMM) += dell-smm-hwmon.o obj-$(CONFIG_SENSORS_DME1737) += dme1737.o +obj-$(CONFIG_SENSORS_DRIVETEMP) += drivetemp.o obj-$(CONFIG_SENSORS_DS620) += ds620.o obj-$(CONFIG_SENSORS_DS1621) += ds1621.o obj-$(CONFIG_SENSORS_EMC1403) += emc1403.o diff --git a/drivers/hwmon/acpi_power_meter.c b/drivers/hwmon/acpi_power_meter.c index 4cf25458f0b9..0db8ef4fd6e1 100644 --- a/drivers/hwmon/acpi_power_meter.c +++ b/drivers/hwmon/acpi_power_meter.c @@ -355,7 +355,9 @@ static ssize_t show_str(struct device *dev, struct acpi_device *acpi_dev = to_acpi_device(dev); struct acpi_power_meter_resource *resource = acpi_dev->driver_data; acpi_string val; + int ret; + mutex_lock(&resource->lock); switch (attr->index) { case 0: val = resource->model_number; @@ -372,8 +374,9 @@ static ssize_t show_str(struct device *dev, val = ""; break; } - - return sprintf(buf, "%s\n", val); + ret = sprintf(buf, "%s\n", val); + mutex_unlock(&resource->lock); + return ret; } static ssize_t show_val(struct device *dev, @@ -817,11 +820,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) resource = acpi_driver_data(device); - mutex_lock(&resource->lock); switch (event) { case METER_NOTIFY_CONFIG: + mutex_lock(&resource->lock); free_capabilities(resource); res = read_capabilities(resource); + mutex_unlock(&resource->lock); if (res) break; @@ -830,15 +834,12 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) break; case METER_NOTIFY_TRIP: sysfs_notify(&device->dev.kobj, NULL, POWER_AVERAGE_NAME); - update_meter(resource); break; case METER_NOTIFY_CAP: sysfs_notify(&device->dev.kobj, NULL, POWER_CAP_NAME); - update_cap(resource); break; case METER_NOTIFY_INTERVAL: sysfs_notify(&device->dev.kobj, NULL, POWER_AVG_INTERVAL_NAME); - update_avg_interval(resource); break; case METER_NOTIFY_CAPPING: sysfs_notify(&device->dev.kobj, NULL, POWER_ALARM_NAME); @@ -848,7 +849,6 @@ static void acpi_power_meter_notify(struct acpi_device *device, u32 event) WARN(1, "Unexpected event %d\n", event); break; } - mutex_unlock(&resource->lock); acpi_bus_generate_netlink_event(ACPI_POWER_METER_CLASS, dev_name(&device->dev), event, 0); @@ -912,8 +912,8 @@ static int acpi_power_meter_remove(struct acpi_device *device) resource = acpi_driver_data(device); hwmon_device_unregister(resource->hwmon_dev); - free_capabilities(resource); remove_attrs(resource); + free_capabilities(resource); kfree(resource); return 0; diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 9632e2e3c4bb..319a0519ebdb 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -413,7 +413,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index ec93b8d673f5..0433c77b2571 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -46,6 +46,7 @@ #define APPLESMC_MIN_WAIT 0x0010 #define APPLESMC_RETRY_WAIT 0x0100 #define APPLESMC_MAX_WAIT 0x20000 +#define APPLESMC_UDELAY_MAX 20000 #define APPLESMC_READ_CMD 0x10 #define APPLESMC_WRITE_CMD 0x11 @@ -157,14 +158,23 @@ static struct workqueue_struct *applesmc_led_wq; static int wait_read(void) { u8 status; - int us; - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { + unsigned int us; + + for (us = APPLESMC_MIN_WAIT; us < APPLESMC_UDELAY_MAX; us <<= 1) { udelay(us); status = inb(APPLESMC_CMD_PORT); /* read: wait for smc to settle */ if (status & 0x01) return 0; } + /* switch to mdelay for longer sleeps */ + for (; us < APPLESMC_MAX_WAIT; us <<= 1) { + mdelay(us); + status = inb(APPLESMC_CMD_PORT); + /* read: wait for smc to settle */ + if (status & 0x01) + return 0; + } pr_warn("wait_read() fail: 0x%02x\n", status); return -EIO; @@ -177,10 +187,10 @@ static int wait_read(void) static int send_byte(u8 cmd, u16 port) { u8 status; - int us; + unsigned int us; outb(cmd, port); - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { + for (us = APPLESMC_MIN_WAIT; us < APPLESMC_UDELAY_MAX; us <<= 1) { udelay(us); status = inb(APPLESMC_CMD_PORT); /* write: wait for smc to settle */ @@ -190,6 +200,23 @@ static int send_byte(u8 cmd, u16 port) if (status & 0x04) return 0; /* timeout: give up */ + if (us << 1 == APPLESMC_UDELAY_MAX) + break; + /* busy: long wait and resend */ + udelay(APPLESMC_RETRY_WAIT); + outb(cmd, port); + } + /* switch to mdelay for longer sleeps */ + for (; us < APPLESMC_MAX_WAIT; us <<= 1) { + mdelay(us); + status = inb(APPLESMC_CMD_PORT); + /* write: wait for smc to settle */ + if (status & 0x02) + continue; + /* ready: cmd accepted, return */ + if (status & 0x04) + return 0; + /* timeout: give up */ if (us << 1 == APPLESMC_MAX_WAIT) break; /* busy: long wait and resend */ diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c new file mode 100644 index 000000000000..370d0c74eb01 --- /dev/null +++ b/drivers/hwmon/drivetemp.c @@ -0,0 +1,574 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hwmon client for disk and solid state drives with temperature sensors + * Copyright (C) 2019 Zodiac Inflight Innovations + * + * With input from: + * Hwmon client for S.M.A.R.T. hard disk drives with temperature sensors. + * (C) 2018 Linus Walleij + * + * hwmon: Driver for SCSI/ATA temperature sensors + * by Constantin Baranov , submitted September 2009 + * + * This drive supports reporting the temperatire of SATA drives. It can be + * easily extended to report the temperature of SCSI drives. + * + * The primary means to read drive temperatures and temperature limits + * for ATA drives is the SCT Command Transport feature set as specified in + * ATA8-ACS. + * It can be used to read the current drive temperature, temperature limits, + * and historic minimum and maximum temperatures. The SCT Command Transport + * feature set is documented in "AT Attachment 8 - ATA/ATAPI Command Set + * (ATA8-ACS)". + * + * If the SCT Command Transport feature set is not available, drive temperatures + * may be readable through SMART attributes. Since SMART attributes are not well + * defined, this method is only used as fallback mechanism. + * + * There are three SMART attributes which may report drive temperatures. + * Those are defined as follows (from + * http://www.cropel.com/library/smart-attribute-list.aspx). + * + * 190 Temperature Temperature, monitored by a sensor somewhere inside + * the drive. Raw value typicaly holds the actual + * temperature (hexadecimal) in its rightmost two digits. + * + * 194 Temperature Temperature, monitored by a sensor somewhere inside + * the drive. Raw value typicaly holds the actual + * temperature (hexadecimal) in its rightmost two digits. + * + * 231 Temperature Temperature, monitored by a sensor somewhere inside + * the drive. Raw value typicaly holds the actual + * temperature (hexadecimal) in its rightmost two digits. + * + * Wikipedia defines attributes a bit differently. + * + * 190 Temperature Value is equal to (100-temp. Β°C), allowing manufacturer + * Difference or to set a minimum threshold which corresponds to a + * Airflow maximum temperature. This also follows the convention of + * Temperature 100 being a best-case value and lower values being + * undesirable. However, some older drives may instead + * report raw Temperature (identical to 0xC2) or + * Temperature minus 50 here. + * 194 Temperature or Indicates the device temperature, if the appropriate + * Temperature sensor is fitted. Lowest byte of the raw value contains + * Celsius the exact temperature value (Celsius degrees). + * 231 Life Left Indicates the approximate SSD life left, in terms of + * (SSDs) or program/erase cycles or available reserved blocks. + * Temperature A normalized value of 100 represents a new drive, with + * a threshold value at 10 indicating a need for + * replacement. A value of 0 may mean that the drive is + * operating in read-only mode to allow data recovery. + * Previously (pre-2010) occasionally used for Drive + * Temperature (more typically reported at 0xC2). + * + * Common denominator is that the first raw byte reports the temperature + * in degrees C on almost all drives. Some drives may report a fractional + * temperature in the second raw byte. + * + * Known exceptions (from libatasmart): + * - SAMSUNG SV0412H and SAMSUNG SV1204H) report the temperature in 10th + * degrees C in the first two raw bytes. + * - A few Maxtor drives report an unknown or bad value in attribute 194. + * - Certain Apple SSD drives report an unknown value in attribute 190. + * Only certain firmware versions are affected. + * + * Those exceptions affect older ATA drives and are currently ignored. + * Also, the second raw byte (possibly reporting the fractional temperature) + * is currently ignored. + * + * Many drives also report temperature limits in additional SMART data raw + * bytes. The format of those is not well defined and varies widely. + * The driver does not currently attempt to report those limits. + * + * According to data in smartmontools, attribute 231 is rarely used to report + * drive temperatures. At the same time, several drives report SSD life left + * in attribute 231, but do not support temperature sensors. For this reason, + * attribute 231 is currently ignored. + * + * Following above definitions, temperatures are reported as follows. + * If SCT Command Transport is supported, it is used to read the + * temperature and, if available, temperature limits. + * - Otherwise, if SMART attribute 194 is supported, it is used to read + * the temperature. + * - Otherwise, if SMART attribute 190 is supported, it is used to read + * the temperature. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct drivetemp_data { + struct list_head list; /* list of instantiated devices */ + struct mutex lock; /* protect data buffer accesses */ + struct scsi_device *sdev; /* SCSI device */ + struct device *dev; /* instantiating device */ + struct device *hwdev; /* hardware monitoring device */ + u8 smartdata[ATA_SECT_SIZE]; /* local buffer */ + int (*get_temp)(struct drivetemp_data *st, u32 attr, long *val); + bool have_temp_lowest; /* lowest temp in SCT status */ + bool have_temp_highest; /* highest temp in SCT status */ + bool have_temp_min; /* have min temp */ + bool have_temp_max; /* have max temp */ + bool have_temp_lcrit; /* have lower critical limit */ + bool have_temp_crit; /* have critical limit */ + int temp_min; /* min temp */ + int temp_max; /* max temp */ + int temp_lcrit; /* lower critical limit */ + int temp_crit; /* critical limit */ +}; + +static LIST_HEAD(drivetemp_devlist); + +#define ATA_MAX_SMART_ATTRS 30 +#define SMART_TEMP_PROP_190 190 +#define SMART_TEMP_PROP_194 194 + +#define SCT_STATUS_REQ_ADDR 0xe0 +#define SCT_STATUS_VERSION_LOW 0 /* log byte offsets */ +#define SCT_STATUS_VERSION_HIGH 1 +#define SCT_STATUS_TEMP 200 +#define SCT_STATUS_TEMP_LOWEST 201 +#define SCT_STATUS_TEMP_HIGHEST 202 +#define SCT_READ_LOG_ADDR 0xe1 +#define SMART_READ_LOG 0xd5 +#define SMART_WRITE_LOG 0xd6 + +#define INVALID_TEMP 0x80 + +#define temp_is_valid(temp) ((temp) != INVALID_TEMP) +#define temp_from_sct(temp) (((s8)(temp)) * 1000) + +static inline bool ata_id_smart_supported(u16 *id) +{ + return id[ATA_ID_COMMAND_SET_1] & BIT(0); +} + +static inline bool ata_id_smart_enabled(u16 *id) +{ + return id[ATA_ID_CFS_ENABLE_1] & BIT(0); +} + +static int drivetemp_scsi_command(struct drivetemp_data *st, + u8 ata_command, u8 feature, + u8 lba_low, u8 lba_mid, u8 lba_high) +{ + u8 scsi_cmd[MAX_COMMAND_SIZE]; + int data_dir; + + memset(scsi_cmd, 0, sizeof(scsi_cmd)); + scsi_cmd[0] = ATA_16; + if (ata_command == ATA_CMD_SMART && feature == SMART_WRITE_LOG) { + scsi_cmd[1] = (5 << 1); /* PIO Data-out */ + /* + * No off.line or cc, write to dev, block count in sector count + * field. + */ + scsi_cmd[2] = 0x06; + data_dir = DMA_TO_DEVICE; + } else { + scsi_cmd[1] = (4 << 1); /* PIO Data-in */ + /* + * No off.line or cc, read from dev, block count in sector count + * field. + */ + scsi_cmd[2] = 0x0e; + data_dir = DMA_FROM_DEVICE; + } + scsi_cmd[4] = feature; + scsi_cmd[6] = 1; /* 1 sector */ + scsi_cmd[8] = lba_low; + scsi_cmd[10] = lba_mid; + scsi_cmd[12] = lba_high; + scsi_cmd[14] = ata_command; + + return scsi_execute_req(st->sdev, scsi_cmd, data_dir, + st->smartdata, ATA_SECT_SIZE, NULL, HZ, 5, + NULL); +} + +static int drivetemp_ata_command(struct drivetemp_data *st, u8 feature, + u8 select) +{ + return drivetemp_scsi_command(st, ATA_CMD_SMART, feature, select, + ATA_SMART_LBAM_PASS, ATA_SMART_LBAH_PASS); +} + +static int drivetemp_get_smarttemp(struct drivetemp_data *st, u32 attr, + long *temp) +{ + u8 *buf = st->smartdata; + bool have_temp = false; + u8 temp_raw; + u8 csum; + int err; + int i; + + err = drivetemp_ata_command(st, ATA_SMART_READ_VALUES, 0); + if (err) + return err; + + /* Checksum the read value table */ + csum = 0; + for (i = 0; i < ATA_SECT_SIZE; i++) + csum += buf[i]; + if (csum) { + dev_dbg(&st->sdev->sdev_gendev, + "checksum error reading SMART values\n"); + return -EIO; + } + + for (i = 0; i < ATA_MAX_SMART_ATTRS; i++) { + u8 *attr = buf + i * 12; + int id = attr[2]; + + if (!id) + continue; + + if (id == SMART_TEMP_PROP_190) { + temp_raw = attr[7]; + have_temp = true; + } + if (id == SMART_TEMP_PROP_194) { + temp_raw = attr[7]; + have_temp = true; + break; + } + } + + if (have_temp) { + *temp = temp_raw * 1000; + return 0; + } + + return -ENXIO; +} + +static int drivetemp_get_scttemp(struct drivetemp_data *st, u32 attr, long *val) +{ + u8 *buf = st->smartdata; + int err; + + err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); + if (err) + return err; + switch (attr) { + case hwmon_temp_input: + *val = temp_from_sct(buf[SCT_STATUS_TEMP]); + break; + case hwmon_temp_lowest: + *val = temp_from_sct(buf[SCT_STATUS_TEMP_LOWEST]); + break; + case hwmon_temp_highest: + *val = temp_from_sct(buf[SCT_STATUS_TEMP_HIGHEST]); + break; + default: + err = -EINVAL; + break; + } + return err; +} + +static int drivetemp_identify_sata(struct drivetemp_data *st) +{ + struct scsi_device *sdev = st->sdev; + u8 *buf = st->smartdata; + struct scsi_vpd *vpd; + bool is_ata, is_sata; + bool have_sct_data_table; + bool have_sct_temp; + bool have_smart; + bool have_sct; + u16 *ata_id; + u16 version; + long temp; + int err; + + /* SCSI-ATA Translation present? */ + rcu_read_lock(); + vpd = rcu_dereference(sdev->vpd_pg89); + + /* + * Verify that ATA IDENTIFY DEVICE data is included in ATA Information + * VPD and that the drive implements the SATA protocol. + */ + if (!vpd || vpd->len < 572 || vpd->data[56] != ATA_CMD_ID_ATA || + vpd->data[36] != 0x34) { + rcu_read_unlock(); + return -ENODEV; + } + ata_id = (u16 *)&vpd->data[60]; + is_ata = ata_id_is_ata(ata_id); + is_sata = ata_id_is_sata(ata_id); + have_sct = ata_id_sct_supported(ata_id); + have_sct_data_table = ata_id_sct_data_tables(ata_id); + have_smart = ata_id_smart_supported(ata_id) && + ata_id_smart_enabled(ata_id); + + rcu_read_unlock(); + + /* bail out if this is not a SATA device */ + if (!is_ata || !is_sata) + return -ENODEV; + if (!have_sct) + goto skip_sct; + + err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_STATUS_REQ_ADDR); + if (err) + goto skip_sct; + + version = (buf[SCT_STATUS_VERSION_HIGH] << 8) | + buf[SCT_STATUS_VERSION_LOW]; + if (version != 2 && version != 3) + goto skip_sct; + + have_sct_temp = temp_is_valid(buf[SCT_STATUS_TEMP]); + if (!have_sct_temp) + goto skip_sct; + + st->have_temp_lowest = temp_is_valid(buf[SCT_STATUS_TEMP_LOWEST]); + st->have_temp_highest = temp_is_valid(buf[SCT_STATUS_TEMP_HIGHEST]); + + if (!have_sct_data_table) + goto skip_sct; + + /* Request and read temperature history table */ + memset(buf, '\0', sizeof(st->smartdata)); + buf[0] = 5; /* data table command */ + buf[2] = 1; /* read table */ + buf[4] = 2; /* temperature history table */ + + err = drivetemp_ata_command(st, SMART_WRITE_LOG, SCT_STATUS_REQ_ADDR); + if (err) + goto skip_sct_data; + + err = drivetemp_ata_command(st, SMART_READ_LOG, SCT_READ_LOG_ADDR); + if (err) + goto skip_sct_data; + + /* + * Temperature limits per AT Attachment 8 - + * ATA/ATAPI Command Set (ATA8-ACS) + */ + st->have_temp_max = temp_is_valid(buf[6]); + st->have_temp_crit = temp_is_valid(buf[7]); + st->have_temp_min = temp_is_valid(buf[8]); + st->have_temp_lcrit = temp_is_valid(buf[9]); + + st->temp_max = temp_from_sct(buf[6]); + st->temp_crit = temp_from_sct(buf[7]); + st->temp_min = temp_from_sct(buf[8]); + st->temp_lcrit = temp_from_sct(buf[9]); + +skip_sct_data: + if (have_sct_temp) { + st->get_temp = drivetemp_get_scttemp; + return 0; + } +skip_sct: + if (!have_smart) + return -ENODEV; + st->get_temp = drivetemp_get_smarttemp; + return drivetemp_get_smarttemp(st, hwmon_temp_input, &temp); +} + +static int drivetemp_identify(struct drivetemp_data *st) +{ + struct scsi_device *sdev = st->sdev; + + /* Bail out immediately if there is no inquiry data */ + if (!sdev->inquiry || sdev->inquiry_len < 16) + return -ENODEV; + + /* Disk device? */ + if (sdev->type != TYPE_DISK && sdev->type != TYPE_ZBC) + return -ENODEV; + + return drivetemp_identify_sata(st); +} + +static int drivetemp_read(struct device *dev, enum hwmon_sensor_types type, + u32 attr, int channel, long *val) +{ + struct drivetemp_data *st = dev_get_drvdata(dev); + int err = 0; + + if (type != hwmon_temp) + return -EINVAL; + + switch (attr) { + case hwmon_temp_input: + case hwmon_temp_lowest: + case hwmon_temp_highest: + mutex_lock(&st->lock); + err = st->get_temp(st, attr, val); + mutex_unlock(&st->lock); + break; + case hwmon_temp_lcrit: + *val = st->temp_lcrit; + break; + case hwmon_temp_min: + *val = st->temp_min; + break; + case hwmon_temp_max: + *val = st->temp_max; + break; + case hwmon_temp_crit: + *val = st->temp_crit; + break; + default: + err = -EINVAL; + break; + } + return err; +} + +static umode_t drivetemp_is_visible(const void *data, + enum hwmon_sensor_types type, + u32 attr, int channel) +{ + const struct drivetemp_data *st = data; + + switch (type) { + case hwmon_temp: + switch (attr) { + case hwmon_temp_input: + return 0444; + case hwmon_temp_lowest: + if (st->have_temp_lowest) + return 0444; + break; + case hwmon_temp_highest: + if (st->have_temp_highest) + return 0444; + break; + case hwmon_temp_min: + if (st->have_temp_min) + return 0444; + break; + case hwmon_temp_max: + if (st->have_temp_max) + return 0444; + break; + case hwmon_temp_lcrit: + if (st->have_temp_lcrit) + return 0444; + break; + case hwmon_temp_crit: + if (st->have_temp_crit) + return 0444; + break; + default: + break; + } + break; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *drivetemp_info[] = { + HWMON_CHANNEL_INFO(chip, + HWMON_C_REGISTER_TZ), + HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | + HWMON_T_LOWEST | HWMON_T_HIGHEST | + HWMON_T_MIN | HWMON_T_MAX | + HWMON_T_LCRIT | HWMON_T_CRIT), + NULL +}; + +static const struct hwmon_ops drivetemp_ops = { + .is_visible = drivetemp_is_visible, + .read = drivetemp_read, +}; + +static const struct hwmon_chip_info drivetemp_chip_info = { + .ops = &drivetemp_ops, + .info = drivetemp_info, +}; + +/* + * The device argument points to sdev->sdev_dev. Its parent is + * sdev->sdev_gendev, which we can use to get the scsi_device pointer. + */ +static int drivetemp_add(struct device *dev, struct class_interface *intf) +{ + struct scsi_device *sdev = to_scsi_device(dev->parent); + struct drivetemp_data *st; + int err; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + st->sdev = sdev; + st->dev = dev; + mutex_init(&st->lock); + + if (drivetemp_identify(st)) { + err = -ENODEV; + goto abort; + } + + st->hwdev = hwmon_device_register_with_info(dev->parent, "drivetemp", + st, &drivetemp_chip_info, + NULL); + if (IS_ERR(st->hwdev)) { + err = PTR_ERR(st->hwdev); + goto abort; + } + + list_add(&st->list, &drivetemp_devlist); + return 0; + +abort: + kfree(st); + return err; +} + +static void drivetemp_remove(struct device *dev, struct class_interface *intf) +{ + struct drivetemp_data *st, *tmp; + + list_for_each_entry_safe(st, tmp, &drivetemp_devlist, list) { + if (st->dev == dev) { + list_del(&st->list); + hwmon_device_unregister(st->hwdev); + kfree(st); + break; + } + } +} + +static struct class_interface drivetemp_interface = { + .add_dev = drivetemp_add, + .remove_dev = drivetemp_remove, +}; + +static int __init drivetemp_init(void) +{ + return scsi_register_interface(&drivetemp_interface); +} + +static void __exit drivetemp_exit(void) +{ + scsi_unregister_interface(&drivetemp_interface); +} + +module_init(drivetemp_init); +module_exit(drivetemp_exit); + +MODULE_AUTHOR("Guenter Roeck "); +MODULE_DESCRIPTION("Hard drive temperature monitor"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index f01f4887fb2e..a91ed01abb68 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -82,8 +82,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882, #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */ -#define LTC_NOT_BUSY BIT(5) -#define LTC_NOT_PENDING BIT(4) +#define LTC_NOT_BUSY BIT(6) +#define LTC_NOT_PENDING BIT(5) /* * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 8e48c7458aa3..255f8f41c8ff 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -718,9 +718,6 @@ static int msc_win_set_lockout(struct msc_window *win, if (old != expect) { ret = -EINVAL; - dev_warn_ratelimited(msc_dev(win->msc), - "expected lockout state %d, got %d\n", - expect, old); goto unlock; } @@ -741,6 +738,10 @@ static int msc_win_set_lockout(struct msc_window *win, /* from intel_th_msc_window_unlock(), don't warn if not locked */ if (expect == WIN_LOCKED && old == new) return 0; + + dev_warn_ratelimited(msc_dev(win->msc), + "expected lockout state %d, got %d\n", + expect, old); } return ret; @@ -760,7 +761,7 @@ static int msc_configure(struct msc *msc) lockdep_assert_held(&msc->buf_mutex); if (msc->mode > MSC_MODE_MULTI) - return -ENOTSUPP; + return -EINVAL; if (msc->mode == MSC_MODE_MULTI) { if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE)) @@ -1294,7 +1295,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages, } else if (msc->mode == MSC_MODE_MULTI) { ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins); } else { - ret = -ENOTSUPP; + ret = -EINVAL; } if (!ret) { @@ -1530,7 +1531,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf, if (ret >= 0) *ppos = iter->offset; } else { - ret = -ENOTSUPP; + ret = -EINVAL; } put_count: diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index e9d90b53bbc4..86aa6a46bcba 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -234,6 +234,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Elkhart Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Elkhart Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c index b178a5495b67..360b5c03df95 100644 --- a/drivers/hwtracing/stm/p_sys-t.c +++ b/drivers/hwtracing/stm/p_sys-t.c @@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_policy_attrs[] = { static inline bool sys_t_need_ts(struct sys_t_output *op) { if (op->node.ts_interval && - time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) { + time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) { op->ts_jiffies = jiffies; return true; @@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct sys_t_output *op) static bool sys_t_need_clock_sync(struct sys_t_output *op) { if (op->node.clocksync_interval && - time_after(op->clocksync_jiffies + op->node.clocksync_interval, - jiffies)) { + time_after(jiffies, + op->clocksync_jiffies + op->node.clocksync_interval)) { op->clocksync_jiffies = jiffies; return true; diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index 5255d3755411..1de23b4f3809 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -171,7 +171,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index 958161c71985..790ea3fda693 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -273,6 +273,7 @@ static int ec_i2c_probe(struct platform_device *pdev) bus->adap.dev.parent = &pdev->dev; bus->adap.dev.of_node = pdev->dev.of_node; bus->adap.retries = I2C_MAX_RETRIES; + ACPI_COMPANION_SET(&bus->adap.dev, ACPI_COMPANION(&pdev->dev)); err = i2c_add_adapter(&bus->adap); if (err) @@ -298,7 +299,7 @@ static const struct of_device_id cros_ec_i2c_of_match[] = { MODULE_DEVICE_TABLE(of, cros_ec_i2c_of_match); static const struct acpi_device_id cros_ec_i2c_tunnel_acpi_id[] = { - { "GOOG001A", 0 }, + { "GOOG0012", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, cros_ec_i2c_tunnel_acpi_id); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 050adda7c1bd..05b35ac33ce3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -313,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev) pm_runtime_get_noresume(&pdev->dev); i2c_del_adapter(&dev->adapter); + devm_free_irq(&pdev->dev, dev->irq, dev); pci_free_irq_vectors(pdev); } diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index 3a9e840a3546..a4a6825c8758 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -348,7 +348,7 @@ static struct gpio_desc *i2c_gpio_get_desc(struct device *dev, if (ret == -ENOENT) retdesc = ERR_PTR(-EPROBE_DEFER); - if (ret != -EPROBE_DEFER) + if (PTR_ERR(retdesc) != -EPROBE_DEFER) dev_err(dev, "error trying to get descriptor: %d\n", ret); return retdesc; diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 25dcd73acd63..8f0e1f802f2d 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -73,25 +73,6 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -529,21 +510,8 @@ static irqreturn_t jz4780_i2c_irq(int irqno, void *dev_id) static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 62a1c92ab803..ce70b5288472 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -394,9 +394,17 @@ EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle); static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device_by_acpi_dev(&i2c_bus_type, adev); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index a1898e11b04e..943bf944bf72 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -66,6 +66,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) struct ide_timing t; u8 arttim = 0; + if (drive->dn >= ARRAY_SIZE(drwtim_regs)) + return; + ide_timing_compute(drive, mode, &t, T, 0); /* diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index ac6fc3fffa0d..458e72e034b0 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -115,6 +115,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) struct pci_dev *dev = to_pci_dev(hwif->dev); const u8 pio = drive->pio_mode - XFER_PIO_0; + if (drive->dn >= ARRAY_SIZE(drive_pci)) + return; + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); if (svwks_csb_check(dev)) { @@ -141,6 +144,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; + if (drive->dn >= ARRAY_SIZE(drive_pci2)) + return; + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); pci_read_config_byte(dev, 0x54, &ultra_enable); diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index 67b8817995c0..60daf04ce188 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -237,6 +237,7 @@ static const struct adxl372_axis_lookup adxl372_axis_lookup_table[] = { .realbits = 12, \ .storagebits = 16, \ .shift = 4, \ + .endianness = IIO_BE, \ }, \ } diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c index 50fa0fc32baa..0a0bffe04217 100644 --- a/drivers/iio/accel/st_accel_i2c.c +++ b/drivers/iio/accel/st_accel_i2c.c @@ -114,7 +114,7 @@ MODULE_DEVICE_TABLE(of, st_accel_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id st_accel_acpi_match[] = { - {"SMO8840", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME}, + {"SMO8840", (kernel_ulong_t)LIS2DH12_ACCEL_DEV_NAME}, {"SMO8A90", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME}, { }, }; diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index e1850f3d5cf3..2a6950aa6ae9 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -723,6 +723,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + u32 cor; if (!chan) continue; @@ -731,6 +732,20 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) chan->type == IIO_PRESSURE) continue; + if (state) { + cor = at91_adc_readl(st, AT91_SAMA5D2_COR); + + if (chan->differential) + cor |= (BIT(chan->channel) | + BIT(chan->channel2)) << + AT91_SAMA5D2_COR_DIFF_OFFSET; + else + cor &= ~(BIT(chan->channel) << + AT91_SAMA5D2_COR_DIFF_OFFSET); + + at91_adc_writel(st, AT91_SAMA5D2_COR, cor); + } + if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, BIT(chan->channel)); diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index e493242c266e..3ae0366a7b58 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -842,31 +842,6 @@ static inline void stm32_dfsdm_process_data(struct stm32_dfsdm_adc *adc, } } -static irqreturn_t stm32_dfsdm_adc_trigger_handler(int irq, void *p) -{ - struct iio_poll_func *pf = p; - struct iio_dev *indio_dev = pf->indio_dev; - struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); - int available = stm32_dfsdm_adc_dma_residue(adc); - - while (available >= indio_dev->scan_bytes) { - s32 *buffer = (s32 *)&adc->rx_buf[adc->bufi]; - - stm32_dfsdm_process_data(adc, buffer); - - iio_push_to_buffers_with_timestamp(indio_dev, buffer, - pf->timestamp); - available -= indio_dev->scan_bytes; - adc->bufi += indio_dev->scan_bytes; - if (adc->bufi >= adc->buf_sz) - adc->bufi = 0; - } - - iio_trigger_notify_done(indio_dev->trig); - - return IRQ_HANDLED; -} - static void stm32_dfsdm_dma_buffer_done(void *data) { struct iio_dev *indio_dev = data; @@ -874,11 +849,6 @@ static void stm32_dfsdm_dma_buffer_done(void *data) int available = stm32_dfsdm_adc_dma_residue(adc); size_t old_pos; - if (indio_dev->currentmode & INDIO_BUFFER_TRIGGERED) { - iio_trigger_poll_chained(indio_dev->trig); - return; - } - /* * FIXME: In Kernel interface does not support cyclic DMA buffer,and * offers only an interface to push data samples per samples. @@ -906,7 +876,15 @@ static void stm32_dfsdm_dma_buffer_done(void *data) adc->bufi = 0; old_pos = 0; } - /* regular iio buffer without trigger */ + /* + * In DMA mode the trigger services of IIO are not used + * (e.g. no call to iio_trigger_poll). + * Calling irq handler associated to the hardware trigger is not + * relevant as the conversions have already been done. Data + * transfers are performed directly in DMA callback instead. + * This implementation avoids to call trigger irq handler that + * may sleep, in an atomic context (DMA irq handler context). + */ if (adc->dev_data->type == DFSDM_IIO) iio_push_to_buffers(indio_dev, buffer); } @@ -1204,6 +1182,8 @@ static int stm32_dfsdm_single_conv(struct iio_dev *indio_dev, stm32_dfsdm_stop_conv(adc); + stm32_dfsdm_process_data(adc, res); + stop_dfsdm: stm32_dfsdm_stop_dfsdm(adc->dfsdm); @@ -1515,8 +1495,7 @@ static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev) } ret = iio_triggered_buffer_setup(indio_dev, - &iio_pollfunc_store_time, - &stm32_dfsdm_adc_trigger_handler, + &iio_pollfunc_store_time, NULL, &stm32_dfsdm_buffer_setup_ops); if (ret) { stm32_dfsdm_dma_release(indio_dev); diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig index 0b91de4df8f4..a7e65a59bf42 100644 --- a/drivers/iio/chemical/Kconfig +++ b/drivers/iio/chemical/Kconfig @@ -91,6 +91,8 @@ config SPS30 tristate "SPS30 particulate matter sensor" depends on I2C select CRC8 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here to build support for the Sensirion SPS30 particulate matter sensor. diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index 57be68b291fa..26c50b24bc08 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -138,7 +138,6 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { [2] = LSM330DLC_GYRO_DEV_NAME, [3] = L3G4IS_GYRO_DEV_NAME, [4] = LSM330_GYRO_DEV_NAME, - [5] = LSM9DS0_GYRO_DEV_NAME, }, .ch = (struct iio_chan_spec *)st_gyro_16bit_channels, .odr = { @@ -208,6 +207,80 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { .multi_read_bit = true, .bootime = 2, }, + { + .wai = 0xd4, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, + .sensors_supported = { + [0] = LSM9DS0_GYRO_DEV_NAME, + }, + .ch = (struct iio_chan_spec *)st_gyro_16bit_channels, + .odr = { + .addr = 0x20, + .mask = GENMASK(7, 6), + .odr_avl = { + { .hz = 95, .value = 0x00, }, + { .hz = 190, .value = 0x01, }, + { .hz = 380, .value = 0x02, }, + { .hz = 760, .value = 0x03, }, + }, + }, + .pw = { + .addr = 0x20, + .mask = BIT(3), + .value_on = ST_SENSORS_DEFAULT_POWER_ON_VALUE, + .value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE, + }, + .enable_axis = { + .addr = ST_SENSORS_DEFAULT_AXIS_ADDR, + .mask = ST_SENSORS_DEFAULT_AXIS_MASK, + }, + .fs = { + .addr = 0x23, + .mask = GENMASK(5, 4), + .fs_avl = { + [0] = { + .num = ST_GYRO_FS_AVL_245DPS, + .value = 0x00, + .gain = IIO_DEGREE_TO_RAD(8750), + }, + [1] = { + .num = ST_GYRO_FS_AVL_500DPS, + .value = 0x01, + .gain = IIO_DEGREE_TO_RAD(17500), + }, + [2] = { + .num = ST_GYRO_FS_AVL_2000DPS, + .value = 0x02, + .gain = IIO_DEGREE_TO_RAD(70000), + }, + }, + }, + .bdu = { + .addr = 0x23, + .mask = BIT(7), + }, + .drdy_irq = { + .int2 = { + .addr = 0x22, + .mask = BIT(3), + }, + /* + * The sensor has IHL (active low) and open + * drain settings, but only for INT1 and not + * for the DRDY line on INT2. + */ + .stat_drdy = { + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, + .mask = GENMASK(2, 0), + }, + }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, + .multi_read_bit = true, + .bootime = 2, + }, { .wai = 0xd7, .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index b921dd9e108f..e45123d8d281 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1506,8 +1506,11 @@ static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor, if (err < 0) return err; - if (!hw->enable_event) - st_lsm6dsx_sensor_set_enable(sensor, false); + if (!hw->enable_event) { + err = st_lsm6dsx_sensor_set_enable(sensor, false); + if (err < 0) + return err; + } *val = (s16)le16_to_cpu(data); diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index b0e241aaefb4..e5b00a6611ac 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -167,16 +167,17 @@ static int vcnl4200_init(struct vcnl4000_data *data) data->vcnl4200_ps.reg = VCNL4200_PS_DATA; switch (id) { case VCNL4200_PROD_ID: - /* Integration time is 50ms, but the experiments */ - /* show 54ms in total. */ - data->vcnl4200_al.sampling_rate = ktime_set(0, 54000 * 1000); - data->vcnl4200_ps.sampling_rate = ktime_set(0, 4200 * 1000); + /* Default wait time is 50ms, add 20% tolerance. */ + data->vcnl4200_al.sampling_rate = ktime_set(0, 60000 * 1000); + /* Default wait time is 4.8ms, add 20% tolerance. */ + data->vcnl4200_ps.sampling_rate = ktime_set(0, 5760 * 1000); data->al_scale = 24000; break; case VCNL4040_PROD_ID: - /* Integration time is 80ms, add 10ms. */ - data->vcnl4200_al.sampling_rate = ktime_set(0, 100000 * 1000); - data->vcnl4200_ps.sampling_rate = ktime_set(0, 100000 * 1000); + /* Default wait time is 80ms, add 20% tolerance. */ + data->vcnl4200_al.sampling_rate = ktime_set(0, 96000 * 1000); + /* Default wait time is 5ms, add 20% tolerance. */ + data->vcnl4200_ps.sampling_rate = ktime_set(0, 6000 * 1000); data->al_scale = 120000; break; } diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index fc7e910f8e8b..d32996702110 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -564,7 +564,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev, * We read all axes and discard all but one, for optimized * reading, use the triggered buffer. */ - *val = le16_to_cpu(hw_values[chan->address]); + *val = (s16)le16_to_cpu(hw_values[chan->address]); ret = IIO_VAL_INT; } diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index a5dfe65cd9b9..f98510c714b5 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv, return 0; } -static void stm32_timer_stop(struct stm32_timer_trigger *priv) +static void stm32_timer_stop(struct stm32_timer_trigger *priv, + struct iio_trigger *trig) { u32 ccer, cr1; @@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv) regmap_write(priv->regmap, TIM_PSC, 0); regmap_write(priv->regmap, TIM_ARR, 0); + /* Force disable master mode */ + if (stm32_timer_is_trgo2_name(trig->name)) + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0); + else + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0); + /* Make sure that registers are updated */ regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG); } @@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev, return ret; if (freq == 0) { - stm32_timer_stop(priv); + stm32_timer_stop(priv, trig); } else { ret = stm32_timer_start(priv, trig, freq); if (ret) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 606fa6d86685..1753a9801b70 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -139,7 +139,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d535995711c3..e55f345799e4 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -51,9 +51,8 @@ struct ib_pkey_cache { struct ib_update_work { struct work_struct work; - struct ib_device *device; - u8 port_num; - bool enforce_security; + struct ib_event event; + bool enforce_security; }; union ib_gid zgid; @@ -130,7 +129,7 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) event.element.port_num = port; event.event = IB_EVENT_GID_CHANGE; - ib_dispatch_event(&event); + ib_dispatch_event_clients(&event); } static const char * const gid_type_str[] = { @@ -1381,9 +1380,8 @@ static int config_non_roce_gid_cache(struct ib_device *device, return ret; } -static void ib_cache_update(struct ib_device *device, - u8 port, - bool enforce_security) +static int +ib_cache_update(struct ib_device *device, u8 port, bool enforce_security) { struct ib_port_attr *tprops = NULL; struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; @@ -1391,11 +1389,11 @@ static void ib_cache_update(struct ib_device *device, int ret; if (!rdma_is_port_valid(device, port)) - return; + return -EINVAL; tprops = kmalloc(sizeof *tprops, GFP_KERNEL); if (!tprops) - return; + return -ENOMEM; ret = ib_query_port(device, port, tprops); if (ret) { @@ -1413,8 +1411,10 @@ static void ib_cache_update(struct ib_device *device, pkey_cache = kmalloc(struct_size(pkey_cache, table, tprops->pkey_tbl_len), GFP_KERNEL); - if (!pkey_cache) + if (!pkey_cache) { + ret = -ENOMEM; goto err; + } pkey_cache->table_len = tprops->pkey_tbl_len; @@ -1446,50 +1446,84 @@ static void ib_cache_update(struct ib_device *device, kfree(old_pkey_cache); kfree(tprops); - return; + return 0; err: kfree(pkey_cache); kfree(tprops); + return ret; +} + +static void ib_cache_event_task(struct work_struct *_work) +{ + struct ib_update_work *work = + container_of(_work, struct ib_update_work, work); + int ret; + + /* Before distributing the cache update event, first sync + * the cache. + */ + ret = ib_cache_update(work->event.device, work->event.element.port_num, + work->enforce_security); + + /* GID event is notified already for individual GID entries by + * dispatch_gid_change_event(). Hence, notifiy for rest of the + * events. + */ + if (!ret && work->event.event != IB_EVENT_GID_CHANGE) + ib_dispatch_event_clients(&work->event); + + kfree(work); } -static void ib_cache_task(struct work_struct *_work) +static void ib_generic_event_task(struct work_struct *_work) { struct ib_update_work *work = container_of(_work, struct ib_update_work, work); - ib_cache_update(work->device, - work->port_num, - work->enforce_security); + ib_dispatch_event_clients(&work->event); kfree(work); } -static void ib_cache_event(struct ib_event_handler *handler, - struct ib_event *event) +static bool is_cache_update_event(const struct ib_event *event) +{ + return (event->event == IB_EVENT_PORT_ERR || + event->event == IB_EVENT_PORT_ACTIVE || + event->event == IB_EVENT_LID_CHANGE || + event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_CLIENT_REREGISTER || + event->event == IB_EVENT_GID_CHANGE); +} + +/** + * ib_dispatch_event - Dispatch an asynchronous event + * @event:Event to dispatch + * + * Low-level drivers must call ib_dispatch_event() to dispatch the + * event to all registered event handlers when an asynchronous event + * occurs. + */ +void ib_dispatch_event(const struct ib_event *event) { struct ib_update_work *work; - if (event->event == IB_EVENT_PORT_ERR || - event->event == IB_EVENT_PORT_ACTIVE || - event->event == IB_EVENT_LID_CHANGE || - event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_CLIENT_REREGISTER || - event->event == IB_EVENT_GID_CHANGE) { - work = kmalloc(sizeof *work, GFP_ATOMIC); - if (work) { - INIT_WORK(&work->work, ib_cache_task); - work->device = event->device; - work->port_num = event->element.port_num; - if (event->event == IB_EVENT_PKEY_CHANGE || - event->event == IB_EVENT_GID_CHANGE) - work->enforce_security = true; - else - work->enforce_security = false; - - queue_work(ib_wq, &work->work); - } - } + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return; + + if (is_cache_update_event(event)) + INIT_WORK(&work->work, ib_cache_event_task); + else + INIT_WORK(&work->work, ib_generic_event_task); + + work->event = *event; + if (event->event == IB_EVENT_PKEY_CHANGE || + event->event == IB_EVENT_GID_CHANGE) + work->enforce_security = true; + + queue_work(ib_wq, &work->work); } +EXPORT_SYMBOL(ib_dispatch_event); int ib_cache_setup_one(struct ib_device *device) { @@ -1505,9 +1539,6 @@ int ib_cache_setup_one(struct ib_device *device) rdma_for_each_port (device, p) ib_cache_update(device, p, true); - INIT_IB_EVENT_HANDLER(&device->cache.event_handler, - device, ib_cache_event); - ib_register_event_handler(&device->cache.event_handler); return 0; } @@ -1529,14 +1560,12 @@ void ib_cache_release_one(struct ib_device *device) void ib_cache_cleanup_one(struct ib_device *device) { - /* The cleanup function unregisters the event handler, - * waits for all in-progress workqueue elements and cleans - * up the GID cache. This function should be called after - * the device was removed from the devices list and all - * clients were removed, so the cache exists but is + /* The cleanup function waits for all in-progress workqueue + * elements and cleans up the GID cache. This function should be + * called after the device was removed from the devices list and + * all clients were removed, so the cache exists but is * non-functional and shouldn't be updated anymore. */ - ib_unregister_event_handler(&device->cache.event_handler); flush_workqueue(ib_wq); gid_table_cleanup_one(device); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 455b3659d84b..4decc1d4cc99 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1202,6 +1202,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } refcount_inc(&cm_id_priv->refcount); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 43a6f07e0afe..0b530646f1e5 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3118,6 +3118,7 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3144,6 +3145,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + atomic_inc(&id_priv->refcount); cma_init_resolve_addr_work(work, id_priv); queue_work(cma_wq, &work->work); return 0; @@ -3180,19 +3182,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int ret; id_priv = container_of(id, struct rdma_id_private, id); + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (id_priv->state == RDMA_CM_IDLE) { ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) + if (ret) { + memset(cma_dst_addr(id_priv), 0, + rdma_addr_size(dst_addr)); return ret; + } } - if (cma_family(id_priv) != dst_addr->sa_family) + if (cma_family(id_priv) != dst_addr->sa_family) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); return -EINVAL; + } - memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); } else { diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 3645e092e1c7..9dc7383eef49 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -149,6 +149,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); void ib_cache_cleanup_one(struct ib_device *device); void ib_cache_release_one(struct ib_device *device); +void ib_dispatch_event_clients(struct ib_event *event); #ifdef CONFIG_CGROUP_RDMA void ib_device_register_rdmacg(struct ib_device *device); @@ -337,6 +338,21 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, qp->pd = pd; qp->uobject = uobj; qp->real_qp = qp; + + qp->qp_type = attr->qp_type; + qp->qp_context = attr->qp_context; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->send_cq = attr->send_cq; + qp->recv_cq = attr->recv_cq; + qp->srq = attr->srq; + qp->rwq_ind_tbl = attr->rwq_ind_tbl; + qp->event_handler = attr->event_handler; + + atomic_set(&qp->usecnt, 0); + spin_lock_init(&qp->mr_lock); + INIT_LIST_HEAD(&qp->rdma_mrs); + INIT_LIST_HEAD(&qp->sig_mrs); + /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 84dd74fe13b8..c38b2b0b078a 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -588,6 +588,7 @@ struct ib_device *_ib_alloc_device(size_t size) INIT_LIST_HEAD(&device->event_handler_list); spin_lock_init(&device->event_handler_lock); + init_rwsem(&device->event_handler_rwsem); mutex_init(&device->unregistration_lock); /* * client_data needs to be alloc because we don't want our mark to be @@ -1931,17 +1932,15 @@ EXPORT_SYMBOL(ib_set_client_data); * * ib_register_event_handler() registers an event handler that will be * called back when asynchronous IB events occur (as defined in - * chapter 11 of the InfiniBand Architecture Specification). This - * callback may occur in interrupt context. + * chapter 11 of the InfiniBand Architecture Specification). This + * callback occurs in workqueue context. */ void ib_register_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_add_tail(&event_handler->list, &event_handler->device->event_handler_list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_register_event_handler); @@ -1954,35 +1953,23 @@ EXPORT_SYMBOL(ib_register_event_handler); */ void ib_unregister_event_handler(struct ib_event_handler *event_handler) { - unsigned long flags; - - spin_lock_irqsave(&event_handler->device->event_handler_lock, flags); + down_write(&event_handler->device->event_handler_rwsem); list_del(&event_handler->list); - spin_unlock_irqrestore(&event_handler->device->event_handler_lock, flags); + up_write(&event_handler->device->event_handler_rwsem); } EXPORT_SYMBOL(ib_unregister_event_handler); -/** - * ib_dispatch_event - Dispatch an asynchronous event - * @event:Event to dispatch - * - * Low-level drivers must call ib_dispatch_event() to dispatch the - * event to all registered event handlers when an asynchronous event - * occurs. - */ -void ib_dispatch_event(struct ib_event *event) +void ib_dispatch_event_clients(struct ib_event *event) { - unsigned long flags; struct ib_event_handler *handler; - spin_lock_irqsave(&event->device->event_handler_lock, flags); + down_read(&event->device->event_handler_rwsem); list_for_each_entry(handler, &event->device->event_handler_list, list) handler->handler(handler, event); - spin_unlock_irqrestore(&event->device->event_handler_lock, flags); + up_read(&event->device->event_handler_rwsem); } -EXPORT_SYMBOL(ib_dispatch_event); static int iw_query_port(struct ib_device *device, u8 port_num, diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c index b7cb59844ece..b51bd7087a88 100644 --- a/drivers/infiniband/core/ib_core_uverbs.c +++ b/drivers/infiniband/core/ib_core_uverbs.c @@ -232,7 +232,9 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) if (!entry) return; + xa_lock(&entry->ucontext->mmap_xa); entry->driver_removed = true; + xa_unlock(&entry->ucontext->mmap_xa); kref_put(&entry->ref, rdma_user_mmap_entry_free); } EXPORT_SYMBOL(rdma_user_mmap_entry_remove); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index ade71823370f..da8adadf4755 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -159,8 +159,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index cbf6041a5d4a..ba76709897bb 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1756,6 +1756,8 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, if (ret) goto err_msg; } else { + if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) + goto err_msg; qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4fad732f9b3c..06e5b6787443 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -273,6 +273,23 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return 1; } +static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + pci_p2pdma_unmap_sg(dev->dma_device, sg, sg_cnt, dir); + else + ib_dma_unmap_sg(dev, sg, sg_cnt, dir); +} + +static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg, + u32 sg_cnt, enum dma_data_direction dir) +{ + if (is_pci_p2pdma_page(sg_page(sg))) + return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); + return ib_dma_map_sg(dev, sg, sg_cnt, dir); +} + /** * rdma_rw_ctx_init - initialize a RDMA READ/WRITE context * @ctx: context to initialize @@ -295,11 +312,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_device *dev = qp->pd->device; int ret; - if (is_pci_p2pdma_page(sg_page(sg))) - ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir); - else - ret = ib_dma_map_sg(dev, sg, sg_cnt, dir); - + ret = rdma_rw_map_sg(dev, sg, sg_cnt, dir); if (!ret) return -ENOMEM; sg_cnt = ret; @@ -338,7 +351,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, return ret; out_unmap_sg: - ib_dma_unmap_sg(dev, sg, sg_cnt, dir); + rdma_rw_unmap_sg(dev, sg, sg_cnt, dir); return ret; } EXPORT_SYMBOL(rdma_rw_ctx_init); @@ -588,11 +601,7 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, break; } - if (is_pci_p2pdma_page(sg_page(sg))) - pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg, - sg_cnt, dir); - else - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + rdma_rw_unmap_sg(qp->pd->device, sg, sg_cnt, dir); } EXPORT_SYMBOL(rdma_rw_ctx_destroy); diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8917125ea16d..30d4c126a2db 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1068,7 +1068,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1139,7 +1139,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index 6eb6d2717ca5..2d5608315dc8 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -339,22 +339,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; - - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; + + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 7a3b99597ead..40cadb889114 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -166,10 +166,13 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, * for any address. */ mask |= (sg_dma_address(sg) + pgoff) ^ va; - if (i && i != (umem->nmap - 1)) - /* restrict by length as well for interior SGEs */ - mask |= sg_dma_len(sg); va += sg_dma_len(sg) - pgoff; + /* Except for the last entry, the ending iova alignment sets + * the maximum possible page size as the low bits of the iova + * must be zero when starting the next chunk. + */ + if (i != (umem->nmap - 1)) + mask |= va; pgoff = 0; } best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e42d44e501fd..eb22cb4f26b4 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -187,14 +187,28 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root, unsigned long addr, odp_data->page_shift = PAGE_SHIFT; odp_data->notifier.ops = ops; + /* + * A mmget must be held when registering a notifier, the owming_mm only + * has a mm_grab at this point. + */ + if (!mmget_not_zero(umem->owning_mm)) { + ret = -EFAULT; + goto out_free; + } + odp_data->tgid = get_pid(root->tgid); ret = ib_init_umem_odp(odp_data, ops); - if (ret) { - put_pid(odp_data->tgid); - kfree(odp_data); - return ERR_PTR(ret); - } + if (ret) + goto out_tgid; + mmput(umem->owning_mm); return odp_data; + +out_tgid: + put_pid(odp_data->tgid); + mmput(umem->owning_mm); +out_free: + kfree(odp_data); + return ERR_PTR(ret); } EXPORT_SYMBOL(ib_umem_odp_alloc_child); @@ -242,21 +256,10 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, umem_odp->notifier.ops = ops; umem_odp->page_shift = PAGE_SHIFT; - if (access & IB_ACCESS_HUGETLB) { - struct vm_area_struct *vma; - struct hstate *h; - - down_read(&mm->mmap_sem); - vma = find_vma(mm, ib_umem_start(umem_odp)); - if (!vma || !is_vm_hugetlb_page(vma)) { - up_read(&mm->mmap_sem); - ret = -EINVAL; - goto err_free; - } - h = hstate_vma(vma); - umem_odp->page_shift = huge_page_shift(h); - up_read(&mm->mmap_sem); - } +#ifdef CONFIG_HUGETLB_PAGE + if (access & IB_ACCESS_HUGETLB) + umem_odp->page_shift = HPAGE_SHIFT; +#endif umem_odp->tgid = get_task_pid(current->group_leader, PIDTYPE_PID); ret = ib_init_umem_odp(umem_odp, ops); @@ -266,7 +269,6 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr, err_put_pid: put_pid(umem_odp->tgid); -err_free: kfree(umem_odp); return ERR_PTR(ret); } @@ -440,7 +442,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index d1407fa378e8..1235ffb2389b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1312,6 +1312,9 @@ static void ib_umad_kill_port(struct ib_umad_port *port) struct ib_umad_file *file; int id; + cdev_device_del(&port->sm_cdev, &port->sm_dev); + cdev_device_del(&port->cdev, &port->dev); + mutex_lock(&port->file_mutex); /* Mark ib_dev NULL and block ioctl or other file ops to progress @@ -1331,8 +1334,6 @@ static void ib_umad_kill_port(struct ib_umad_port *port) mutex_unlock(&port->file_mutex); - cdev_device_del(&port->sm_cdev, &port->sm_dev); - cdev_device_del(&port->cdev, &port->dev); ida_free(&umad_ida, port->dev_num); /* balances device_initialize() */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 06ed32c8662f..c3a67ad82ddd 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1433,17 +1433,7 @@ static int create_qp(struct uverbs_attr_bundle *attrs, if (ret) goto err_cb; - qp->pd = pd; - qp->send_cq = attr.send_cq; - qp->recv_cq = attr.recv_cq; - qp->srq = attr.srq; - qp->rwq_ind_tbl = ind_tbl; - qp->event_handler = attr.event_handler; - qp->qp_context = attr.qp_context; - qp->qp_type = attr.qp_type; - atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); - qp->port = 0; if (attr.send_cq) atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) @@ -2720,12 +2710,6 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs, return 0; } -static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec) -{ - /* Returns user space filter size, includes padding */ - return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; -} - static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size, u16 ib_real_filter_sz) { @@ -2869,11 +2853,16 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type, static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { - ssize_t kern_filter_sz; + size_t kern_filter_sz; void *kern_spec_mask; void *kern_spec_val; - kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + if (check_sub_overflow((size_t)kern_spec->hdr.size, + sizeof(struct ib_uverbs_flow_spec_hdr), + &kern_filter_sz)) + return -EINVAL; + + kern_filter_sz /= 2; kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 970d8e31dd65..8f5de4dcad97 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -220,7 +220,6 @@ void ib_uverbs_release_file(struct kref *ref) } static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, - struct ib_uverbs_file *uverbs_file, struct file *filp, char __user *buf, size_t count, loff_t *pos, size_t eventsz) @@ -238,19 +237,16 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue, if (wait_event_interruptible(ev_queue->poll_wait, (!list_empty(&ev_queue->event_list) || - /* The barriers built into wait_event_interruptible() - * and wake_up() guarentee this will see the null set - * without using RCU - */ - !uverbs_file->device->ib_dev))) + ev_queue->is_closed))) return -ERESTARTSYS; + spin_lock_irq(&ev_queue->lock); + /* If device was disassociated and no event exists set an error */ - if (list_empty(&ev_queue->event_list) && - !uverbs_file->device->ib_dev) + if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) { + spin_unlock_irq(&ev_queue->lock); return -EIO; - - spin_lock_irq(&ev_queue->lock); + } } event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list); @@ -285,8 +281,7 @@ static ssize_t ib_uverbs_async_event_read(struct file *filp, char __user *buf, { struct ib_uverbs_async_event_file *file = filp->private_data; - return ib_uverbs_event_read(&file->ev_queue, file->uverbs_file, filp, - buf, count, pos, + return ib_uverbs_event_read(&file->ev_queue, filp, buf, count, pos, sizeof(struct ib_uverbs_async_event_desc)); } @@ -296,9 +291,8 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf, struct ib_uverbs_completion_event_file *comp_ev_file = filp->private_data; - return ib_uverbs_event_read(&comp_ev_file->ev_queue, - comp_ev_file->uobj.ufile, filp, - buf, count, pos, + return ib_uverbs_event_read(&comp_ev_file->ev_queue, filp, buf, count, + pos, sizeof(struct ib_uverbs_comp_event_desc)); } @@ -321,7 +315,9 @@ static __poll_t ib_uverbs_event_poll(struct ib_uverbs_event_queue *ev_queue, static __poll_t ib_uverbs_async_event_poll(struct file *filp, struct poll_table_struct *wait) { - return ib_uverbs_event_poll(filp->private_data, filp, wait); + struct ib_uverbs_async_event_file *file = filp->private_data; + + return ib_uverbs_event_poll(&file->ev_queue, filp, wait); } static __poll_t ib_uverbs_comp_event_poll(struct file *filp, @@ -335,9 +331,9 @@ static __poll_t ib_uverbs_comp_event_poll(struct file *filp, static int ib_uverbs_async_event_fasync(int fd, struct file *filp, int on) { - struct ib_uverbs_event_queue *ev_queue = filp->private_data; + struct ib_uverbs_async_event_file *file = filp->private_data; - return fasync_helper(fd, filp, on, &ev_queue->async_queue); + return fasync_helper(fd, filp, on, &file->ev_queue.async_queue); } static int ib_uverbs_comp_event_fasync(int fd, struct file *filp, int on) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index dd765e176cdd..eb1c68311e52 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1182,16 +1182,6 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, if (ret) goto err; - qp->qp_type = qp_init_attr->qp_type; - qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; - - atomic_set(&qp->usecnt, 0); - qp->mrs_used = 0; - spin_lock_init(&qp->mr_lock); - INIT_LIST_HEAD(&qp->rdma_mrs); - INIT_LIST_HEAD(&qp->sig_mrs); - qp->port = 0; - if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { struct ib_qp *xrc_qp = create_xrc_qp_user(qp, qp_init_attr, udata); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ee1182f9b627..d69dece3b1d5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3036,6 +3036,10 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } + /* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3, + * when entering the TERM state the RNIC MUST initiate a CLOSE. + */ + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index bbcac539777a..89ac2f9ae6dd 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1948,10 +1948,10 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; ep = qhp->ep; - c4iw_get_ep(&ep->com); - disconnect = 1; if (!internal) { + c4iw_get_ep(&ep->com); terminate = 1; + disconnect = 1; } else { terminate = qhp->attr.send_term; ret = rdma_fini(rhp, qhp, ep); diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index c142b23bb401..1aeea5d65c01 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -479,6 +479,8 @@ static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); } + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); return 0; fail: diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9b1fb84a3d45..10924f122072 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1685,6 +1685,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4106,6 +4114,7 @@ def_access_ibp_counter(rc_crwaits); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), [C_RX_LEN_ERR] = RXE32_DEV_CNTR_ELEM(RxLenErr, RCV_LENGTH_ERR_CNT, CNTR_SYNTH), +[C_RX_SHORT_ERR] = RXE32_DEV_CNTR_ELEM(RxShrErr, RCV_SHORT_ERR_CNT, CNTR_SYNTH), [C_RX_ICRC_ERR] = RXE32_DEV_CNTR_ELEM(RxICrcErr, RCV_ICRC_ERR_CNT, CNTR_SYNTH), [C_RX_EBP] = RXE32_DEV_CNTR_ELEM(RxEbpCnt, RCV_EBP_CNT, CNTR_SYNTH), [C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT, @@ -4249,6 +4258,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 4ca5ac8d7e9e..af0061936c66 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -859,6 +859,7 @@ static inline int idx_from_vl(int vl) enum { C_RCV_OVF = 0, C_RX_LEN_ERR, + C_RX_SHORT_ERR, C_RX_ICRC_ERR, C_RX_EBP, C_RX_TID_FULL, @@ -926,6 +927,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index ab3589d17aee..fb3ec9bff7a2 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -381,6 +381,7 @@ #define DC_LCB_STS_LINK_TRANSFER_ACTIVE (DC_LCB_CSRS + 0x000000000468) #define DC_LCB_STS_ROUND_TRIP_LTP_CNT (DC_LCB_CSRS + 0x0000000004B0) #define RCV_LENGTH_ERR_CNT 0 +#define RCV_SHORT_ERR_CNT 2 #define RCV_ICRC_ERR_CNT 6 #define RCV_EBP_CNT 9 #define RCV_BUF_OVFL_CNT 10 diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 01aa1f132f55..941b465244ab 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -734,6 +734,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7c5e3fb22413..b7bb55b57889 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -707,6 +718,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index fc10d65fc3e1..9edfd3e56f61 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1153,6 +1153,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1436,10 +1438,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index f05742ac0949..4da03f823474 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -87,9 +87,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, { int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); @@ -142,10 +139,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) { struct hfi1_ctxtdata *uctxt = fd->uctxt; + mutex_lock(&uctxt->exp_mutex); if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); + mutex_unlock(&uctxt->exp_mutex); kfree(fd->invalid_tids); fd->invalid_tids = NULL; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index fd754a16475a..c2f0d9ba93de 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 089e201d7550..2f6323ad9c59 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -515,10 +515,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -527,6 +528,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5617434cbfb4..416341ada172 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -423,7 +423,7 @@ struct hns_roce_mr_table { struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; - int wqe_cnt; /* WQE num */ + u32 wqe_cnt; /* WQE num */ int max_gs; int offset; int wqe_shift; /* WQE size */ @@ -647,7 +647,6 @@ struct hns_roce_qp { u8 sdb_en; u32 doorbell_qpn; u32 sq_signal_bits; - u32 sq_next_wqe; struct hns_roce_wq sq; struct ib_umem *umem; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 2a2b2112f886..a31a21433f65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -74,8 +74,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; + u32 wqe_idx = 0; int nreq = 0; - u32 ind = 0; int ret = 0; u8 *smac; int loopback; @@ -88,7 +88,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, } spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; + for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { ret = -ENOMEM; @@ -96,6 +96,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -104,9 +106,8 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; /* Corresponding to the RC and RD type wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { @@ -210,7 +211,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, cpu_to_le32((wr->sg_list[1].addr) >> 32); ud_sq_wqe->l_key1 = cpu_to_le32(wr->sg_list[1].lkey); - ind++; } else if (ibqp->qp_type == IB_QPT_RC) { u32 tmp_len = 0; @@ -308,7 +308,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ctrl->flag |= cpu_to_le32(wr->num_sge << HNS_ROCE_WQE_SGE_NUM_BIT); } - ind++; } } @@ -336,7 +335,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, doorbell[1] = sq_db.u32_8; hns_roce_write64_k(doorbell, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -348,12 +346,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - int ret = 0; - int nreq = 0; - int ind = 0; - int i = 0; - u32 reg_val; - unsigned long flags = 0; struct hns_roce_rq_wqe_ctrl *ctrl = NULL; struct hns_roce_wqe_data_seg *scat = NULL; struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); @@ -361,9 +353,14 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct device *dev = &hr_dev->pdev->dev; struct hns_roce_rq_db rq_db; __le32 doorbell[2] = {0}; + unsigned long flags = 0; + unsigned int wqe_idx; + int ret = 0; + int nreq = 0; + int i = 0; + u32 reg_val; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&hr_qp->rq, nreq, @@ -373,6 +370,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -381,7 +380,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, goto out; } - ctrl = get_recv_wqe(hr_qp, ind); + ctrl = get_recv_wqe(hr_qp, wqe_idx); roce_set_field(ctrl->rwqe_byte_12, RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, @@ -393,9 +392,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, for (i = 0; i < wr->num_sge; i++) set_data_seg(scat + i, wr->sg_list + i); - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -2701,7 +2698,6 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } kfree(context); @@ -3315,7 +3311,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; } out: kfree(context); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index cb8071a3e0d5..87186446dffb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -110,7 +110,7 @@ static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg, } static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind) + unsigned int *sge_ind, int valid_num_sge) { struct hns_roce_v2_wqe_data_seg *dseg; struct ib_sge *sg; @@ -123,7 +123,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = wr->num_sge - num_in_wqe; + extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; shift = qp->hr_buf.page_shift; @@ -159,14 +159,16 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, + int valid_num_sge, const struct ib_send_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); + int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (le32_to_cpu(rc_sq_wqe->msg_len) > hr_dev->caps.max_sq_inline) { *bad_wr = wr; @@ -190,7 +192,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -203,19 +205,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && + j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; + j++; } } - set_extend_sge(qp, wr, sge_ind); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); } roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge); + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } return 0; @@ -239,10 +243,11 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; struct ib_qp_attr attr; - unsigned int sge_ind; unsigned int owner_bit; + unsigned int sge_idx; + unsigned int wqe_idx; unsigned long flags; - unsigned int ind; + int valid_num_sge; void *wqe = NULL; bool loopback; int attr_mask; @@ -269,8 +274,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, } spin_lock_irqsave(&qp->sq.lock, flags); - ind = qp->sq_next_wqe; - sge_ind = qp->next_sge; + sge_idx = qp->next_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { @@ -279,6 +283,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } + wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); + if (unlikely(wr->num_sge > qp->sq.max_gs)) { dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, qp->sq.max_gs); @@ -287,14 +293,20 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, goto out; } - wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); - qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = - wr->wr_id; - + wqe = get_send_wqe(qp, wqe_idx); + qp->sq.wrid[wqe_idx] = wr->wr_id; owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + valid_num_sge = 0; tmp_len = 0; + for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + tmp_len += wr->sg_list[i].length; + valid_num_sge++; + } + } + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { ud_sq_wqe = wqe; @@ -330,9 +342,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, V2_UD_SEND_WQE_BYTE_4_OPCODE_S, HNS_ROCE_V2_WQE_OP_SEND); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; - ud_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len); @@ -368,12 +377,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); roce_set_field(ud_sq_wqe->byte_20, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - sge_ind & (qp->sge.sge_cnt - 1)); + sge_idx & (qp->sge.sge_cnt - 1)); roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, @@ -423,13 +432,10 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); - set_extend_sge(qp, wr, &sge_ind); - ind++; + set_extend_sge(qp, wr, &sge_idx, valid_num_sge); } else if (ibqp->qp_type == IB_QPT_RC) { rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; rc_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); @@ -550,15 +556,14 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, - wqe, &sge_ind, bad_wr); + wqe, &sge_idx, + valid_num_sge, bad_wr); if (ret) goto out; } - - ind++; } else { dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -588,8 +593,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, hns_roce_write64(hr_dev, (__le32 *)&sq_db, qp->sq.db_reg_l); - qp->sq_next_wqe = ind; - qp->next_sge = sge_ind; + qp->next_sge = sge_idx; if (qp->state == IB_QPS_ERR) { attr_mask = IB_QP_STATE; @@ -623,13 +627,12 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; int attr_mask; + u32 wqe_idx; int ret = 0; int nreq; - int ind; int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); - ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); if (hr_qp->state == IB_QPS_RESET) { spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -645,6 +648,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); @@ -653,7 +658,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe = get_recv_wqe(hr_qp, ind); + wqe = get_recv_wqe(hr_qp, wqe_idx); dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; for (i = 0; i < wr->num_sge; i++) { if (!wr->sg_list[i].length) @@ -669,8 +674,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, /* rq support inline data */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { - sge_list = hr_qp->rq_inl_buf.wqe_list[ind].sg_list; - hr_qp->rq_inl_buf.wqe_list[ind].sge_cnt = + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; for (i = 0; i < wr->num_sge; i++) { sge_list[i].addr = @@ -679,9 +684,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, } } - hr_qp->rq.wrid[ind] = wr->wr_id; - - ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: @@ -4464,7 +4467,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->rq.tail = 0; hr_qp->sq.head = 0; hr_qp->sq.tail = 0; - hr_qp->sq_next_wqe = 0; hr_qp->next_sge = 0; if (hr_qp->rq.wqe_cnt) *hr_qp->rdb.db_record = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 9ad19170c3f9..95765560c1cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1064,8 +1064,8 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) { if (page_addr & ((1 << mtt->page_shift) - 1)) { dev_err(dev, - "page_addr 0x%llx is not page_shift %d alignment!\n", - page_addr, mtt->page_shift); + "page_addr is not page_shift %d alignment!\n", + mtt->page_shift); ret = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index d44cf33df81a..238614370927 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1225,6 +1225,8 @@ static void i40iw_add_ipv4_addr(struct i40iw_device *iwdev) const struct in_ifaddr *ifa; idev = in_dev_get(dev); + if (!idev) + continue; in_dev_for_each_ifa_rtnl(ifa, idev) { i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, "IP=%pI4, vlan_id=%d, MAC=%pM\n", &ifa->ifa_address, diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index ecd6cadd529a..b591861934b3 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -186,23 +186,6 @@ static void id_map_ent_timeout(struct work_struct *work) kfree(ent); } -static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id) -{ - struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; - struct rb_root *sl_id_map = &sriov->sl_id_map; - struct id_map_entry *ent, *found_ent; - - spin_lock(&sriov->id_map_lock); - ent = xa_erase(&sriov->pv_id_table, pv_cm_id); - if (!ent) - goto out; - found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id); - if (found_ent && found_ent == ent) - rb_erase(&found_ent->node, sl_id_map); -out: - spin_unlock(&sriov->id_map_lock); -} - static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new) { struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map; @@ -294,7 +277,7 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) spin_lock(&sriov->id_map_lock); spin_lock_irqsave(&sriov->going_down_lock, flags); /*make sure that there is no schedule inside the scheduled work.*/ - if (!sriov->is_going_down) { + if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } @@ -341,9 +324,6 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) - id_map_find_del(ibdev, pv_cm_id); - return 0; } @@ -382,12 +362,9 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, *slave = id->slave_id; set_remote_comm_id(mad, id->sl_cm_id); - if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID) + if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID || + mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) schedule_delayed(ibdev, id); - else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID || - mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) { - id_map_find_del(ibdev, (int) pv_cm_id); - } return 0; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 34055cbab38c..2f5d9b181848 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -246,6 +246,13 @@ static int mlx4_ib_update_gids(struct gid_entry *gids, return mlx4_ib_update_gids_v1(gids, ibdev, port_num); } +static void free_gid_entry(struct gid_entry *entry) +{ + memset(&entry->gid, 0, sizeof(entry->gid)); + kfree(entry->ctx); + entry->ctx = NULL; +} + static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) { struct mlx4_ib_dev *ibdev = to_mdev(attr->device); @@ -313,6 +320,8 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) GFP_ATOMIC); if (!gids) { ret = -ENOMEM; + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); } else { for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) { memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); @@ -324,6 +333,12 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) if (!ret && hw_update) { ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); + if (ret) { + spin_lock_bh(&iboe->lock); + *context = NULL; + free_gid_entry(&port_gid_table->gids[free]); + spin_unlock_bh(&iboe->lock); + } kfree(gids); } @@ -353,10 +368,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx->refcount) { unsigned int real_index = ctx->real_index; - memset(&port_gid_table->gids[real_index].gid, 0, - sizeof(port_gid_table->gids[real_index].gid)); - kfree(port_gid_table->gids[real_index].ctx); - port_gid_table->gids[real_index].ctx = NULL; + free_gid_entry(&port_gid_table->gids[real_index]); hw_update = 1; } } diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index ac4d8d1b9a07..1ae6fd95acaa 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index 4f0edd4832bd..b61165359954 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -164,8 +164,10 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID; in->node_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].node_guid = guid; + vfs_ctx[vf].node_guid_valid = 1; + } kfree(in); return err; } @@ -185,8 +187,10 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid) in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID; in->port_guid = guid; err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in); - if (!err) + if (!err) { vfs_ctx[vf].port_guid = guid; + vfs_ctx[vf].port_guid_valid = 1; + } kfree(in); return err; } @@ -208,20 +212,12 @@ int mlx5_ib_get_vf_guid(struct ib_device *device, int vf, u8 port, { struct mlx5_ib_dev *dev = to_mdev(device); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_hca_vport_context *rep; - int err; - - rep = kzalloc(sizeof(*rep), GFP_KERNEL); - if (!rep) - return -ENOMEM; + struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx; - err = mlx5_query_hca_vport_context(mdev, 1, 1, vf+1, rep); - if (err) - goto ex; + node_guid->guid = + vfs_ctx[vf].node_guid_valid ? vfs_ctx[vf].node_guid : 0; + port_guid->guid = + vfs_ctx[vf].port_guid_valid ? vfs_ctx[vf].port_guid : 0; - port_guid->guid = rep->port_guid; - node_guid->guid = rep->node_guid; -ex: - kfree(rep); - return err; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 997cbfe4b90c..760630c7aae7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -815,6 +815,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { + size_t uhw_outlen = (uhw) ? uhw->outlen : 0; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; @@ -828,12 +829,12 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, u64 max_tso; resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); - if (uhw->outlen && uhw->outlen < resp_len) + if (uhw_outlen && uhw_outlen < resp_len) return -EINVAL; resp.response_length = resp_len; - if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) + if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) return -EINVAL; memset(props, 0, sizeof(*props)); @@ -897,7 +898,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->raw_packet_caps |= IB_RAW_PACKET_CAP_CVLAN_STRIPPING; - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) { max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); if (max_tso) { resp.tso_caps.max_tso = 1 << max_tso; @@ -907,7 +908,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) { resp.rss_caps.rx_hash_function = MLX5_RX_HASH_FUNC_TOEPLITZ; resp.rss_caps.rx_hash_fields_mask = @@ -927,9 +928,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.rss_caps); } } else { - if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + if (field_avail(typeof(resp), tso_caps, uhw_outlen)) resp.response_length += sizeof(resp.tso_caps); - if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + if (field_avail(typeof(resp), rss_caps, uhw_outlen)) resp.response_length += sizeof(resp.rss_caps); } @@ -1054,7 +1055,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_MAX_CQ_PERIOD; } - if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + if (field_avail(typeof(resp), cqe_comp_caps, uhw_outlen)) { resp.response_length += sizeof(resp.cqe_comp_caps); if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { @@ -1072,7 +1073,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && + if (field_avail(typeof(resp), packet_pacing_caps, uhw_outlen) && raw_support) { if (MLX5_CAP_QOS(mdev, packet_pacing) && MLX5_CAP_GEN(mdev, qos)) { @@ -1091,7 +1092,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, - uhw->outlen)) { + uhw_outlen)) { if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) resp.mlx5_ib_support_multi_pkt_send_wqes = MLX5_IB_ALLOW_MPW; @@ -1104,7 +1105,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); } - if (field_avail(typeof(resp), flags, uhw->outlen)) { + if (field_avail(typeof(resp), flags, uhw_outlen)) { resp.response_length += sizeof(resp.flags); if (MLX5_CAP_GEN(mdev, cqe_compression_128)) @@ -1120,8 +1121,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; } - if (field_avail(typeof(resp), sw_parsing_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), sw_parsing_caps, uhw_outlen)) { resp.response_length += sizeof(resp.sw_parsing_caps); if (MLX5_CAP_ETH(mdev, swp)) { resp.sw_parsing_caps.sw_parsing_offloads |= @@ -1141,7 +1141,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) && + if (field_avail(typeof(resp), striding_rq_caps, uhw_outlen) && raw_support) { resp.response_length += sizeof(resp.striding_rq_caps); if (MLX5_CAP_GEN(mdev, striding_rq)) { @@ -1164,8 +1164,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } } - if (field_avail(typeof(resp), tunnel_offloads_caps, - uhw->outlen)) { + if (field_avail(typeof(resp), tunnel_offloads_caps, uhw_outlen)) { resp.response_length += sizeof(resp.tunnel_offloads_caps); if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) resp.tunnel_offloads_caps |= @@ -1186,7 +1185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } - if (uhw->outlen) { + if (uhw_outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); if (err) @@ -4771,7 +4770,6 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); if (!pprops) @@ -4781,7 +4779,7 @@ static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) if (!dprops) goto out; - err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); + err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); if (err) { mlx5_ib_warn(dev, "query_device failed %d\n", err); goto out; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b06f32ff5748..b3561e4c44e8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -629,6 +629,7 @@ struct mlx5_ib_mr { /* For ODP and implicit */ atomic_t num_deferred_work; + wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { struct rcu_head rcu; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index ea8bfc3e2d8d..23c4529edf54 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1247,6 +1247,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && length == U64_MAX) { + if (virt_addr != start) + return ERR_PTR(-EINVAL); if (!(access_flags & IB_ACCESS_ON_DEMAND) || !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) return ERR_PTR(-EINVAL); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f924250f80c2..443de6fb578b 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -197,7 +197,8 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr->dev, mr); ib_umem_odp_release(odp); - atomic_dec(&imr->num_deferred_work); + if (atomic_dec_and_test(&imr->num_deferred_work)) + wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) @@ -516,6 +517,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; atomic_set(&imr->num_deferred_work, 0); + init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -573,10 +575,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) * under xa_lock while the child is in the xarray. Thus at this point * it is only decreasing, and all work holding it is now on the wq. */ - if (atomic_read(&imr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&imr->num_deferred_work)); - } + wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); /* * Fence the imr before we destroy the children. This allows us to @@ -607,10 +606,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) /* Wait for all running page-fault handlers to finish. */ synchronize_srcu(&mr->dev->odp_srcu); - if (atomic_read(&mr->num_deferred_work)) { - flush_workqueue(system_unbound_wq); - WARN_ON(atomic_read(&mr->num_deferred_work)); - } + wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); dma_fence_odp_mr(mr); } @@ -624,11 +620,10 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; unsigned long current_seq; u64 access_mask; - u64 start_idx, page_mask; + u64 start_idx; page_shift = odp->page_shift; - page_mask = ~(BIT(page_shift) - 1); - start_idx = (user_va - (mr->mmkey.iova & page_mask)) >> page_shift; + start_idx = (user_va - ib_umem_start(odp)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; if (odp->umem.writable && !downgrade) @@ -767,11 +762,19 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + if (unlikely(io_virt < mr->mmkey.iova)) + return -EFAULT; + if (!odp->is_implicit_odp) { - if (unlikely(io_virt < ib_umem_start(odp) || - ib_umem_end(odp) - io_virt < bcnt)) + u64 user_va; + + if (check_add_overflow(io_virt - mr->mmkey.iova, + (u64)odp->umem.address, &user_va)) + return -EFAULT; + if (unlikely(user_va >= ib_umem_end(odp) || + ib_umem_end(odp) - user_va < bcnt)) return -EFAULT; - return pagefault_real_mr(mr, odp, io_virt, bcnt, bytes_mapped, + return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped, flags); } return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, @@ -1675,7 +1678,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - atomic_dec(&work->frags[i].mr->num_deferred_work); + if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) + wake_up(&work->frags[i].mr->q_deferred_work); kvfree(work); } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7e51870e9e01..89ba2f6cd815 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3394,9 +3394,6 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, struct mlx5_ib_qp_base *base; u32 set_id; - if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) - return 0; - if (counter) set_id = counter->id; else @@ -6529,6 +6526,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) */ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) { + struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); int err = 0; @@ -6538,6 +6536,11 @@ int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter) goto out; } + if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) { + err = -EOPNOTSUPP; + goto out; + } + if (mqp->state == IB_QPS_RTS) { err = __mlx5_ib_qp_set_counter(qp, counter); if (!err) diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 33778d451b82..5ef93f8f17a1 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -329,8 +329,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3cdf75d0c7a4..7858d499db03 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -61,6 +61,8 @@ #define RVT_RWQ_COUNT_THRESHOLD 16 static void rvt_rc_timeout(struct timer_list *t); +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); /* * Convert the AETH RNR timeout code into the number of microseconds. @@ -452,40 +454,41 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) } /** - * free_all_qps - check for QPs still in use + * rvt_free_qp_cb - callback function to reset a qp + * @qp: the qp to reset + * @v: a 64-bit value + * + * This function resets the qp and removes it from the + * qp hash table. + */ +static void rvt_free_qp_cb(struct rvt_qp *qp, u64 v) +{ + unsigned int *qp_inuse = (unsigned int *)v; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* Reset the qp and remove it from the qp hash list */ + rvt_reset_qp(rdi, qp, qp->ibqp.qp_type); + + /* Increment the qp_inuse count */ + (*qp_inuse)++; +} + +/** + * rvt_free_all_qps - check for QPs still in use * @rdi: rvt device info structure * * There should not be any QPs still in use. * Free memory for table. + * Return the number of QPs still in use. */ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { - unsigned long flags; - struct rvt_qp *qp; - unsigned n, qp_inuse = 0; - spinlock_t *ql; /* work around too long line below */ - - if (rdi->driver_f.free_all_qps) - qp_inuse = rdi->driver_f.free_all_qps(rdi); + unsigned int qp_inuse = 0; qp_inuse += rvt_mcast_tree_empty(rdi); - if (!rdi->qp_dev) - return qp_inuse; - - ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(ql, flags); - for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { - qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], - lockdep_is_held(ql)); - RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); + rvt_qp_iter(rdi, (u64)&qp_inuse, rvt_free_qp_cb); - for (; qp; qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql))) - qp_inuse++; - } - spin_unlock_irqrestore(ql, flags); - synchronize_rcu(); return qp_inuse; } @@ -902,14 +905,14 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** - * rvt_reset_qp - initialize the QP state to the reset state + * _rvt_reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * * r_lock, s_hlock, and s_lock are required to be held by the caller */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +static void _rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) __must_hold(&qp->s_lock) __must_hold(&qp->s_hlock) __must_hold(&qp->r_lock) @@ -955,6 +958,27 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, lockdep_assert_held(&qp->s_lock); } +/** + * rvt_reset_qp - initialize the QP state to the reset state + * @rdi: the device info + * @qp: the QP to reset + * @type: the QP type + * + * This is the wrapper function to acquire the r_lock, s_hlock, and s_lock + * before calling _rvt_reset_qp(). + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + _rvt_reset_qp(rdi, qp, type); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); +} + /** rvt_free_qpn - Free a qpn from the bit map * @qpt: QP table * @qpn: queue pair number to free @@ -1546,7 +1570,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, switch (new_state) { case IB_QPS_RESET: if (qp->state != IB_QPS_RESET) - rvt_reset_qp(rdi, qp, ibqp->qp_type); + _rvt_reset_qp(rdi, qp, ibqp->qp_type); break; case IB_QPS_RTR: @@ -1695,13 +1719,7 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); wait_event(qp->wait, !atomic_read(&qp->refcount)); /* qpn is now available for use again */ diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 116cafc9afcf..4bc88708b355 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -329,7 +329,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } return COMPST_ERROR_RETRY; @@ -463,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -479,7 +479,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } } @@ -725,7 +725,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 1); + rxe_run_task(&qp->req.task, 0); } if (pkt) { diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 95834206c80c..92de39c4a7c1 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -408,7 +408,7 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 3bccfef40e7e..ac86363ce1a2 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1225,10 +1225,9 @@ static void siw_cm_llp_data_ready(struct sock *sk) read_lock(&sk->sk_callback_lock); cep = sk_to_cep(sk); - if (!cep) { - WARN_ON(1); + if (!cep) goto out; - } + siw_dbg_cep(cep, "state: %d\n", cep->state); switch (cep->state) { diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index c147f0613d95..1e2fdd21ba6e 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -388,6 +388,9 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { .max_segment_size = SZ_2G }; base_dev->num_comp_vectors = num_possible_cpus(); + xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); + ib_set_device_ops(base_dev, &siw_device_ops); rv = ib_device_set_netdev(base_dev, netdev, 1); if (rv) @@ -415,9 +418,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) sdev->attrs.max_srq_wr = SIW_MAX_SRQ_WR; sdev->attrs.max_srq_sge = SIW_MAX_SGE; - xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); - xa_init_flags(&sdev->mem_xa, XA_FLAGS_ALLOC1); - INIT_LIST_HEAD(&sdev->cep_list); INIT_LIST_HEAD(&sdev->qp_list); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b273e421e910..a1a035270cab 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2575,6 +2575,17 @@ isert_wait4logout(struct isert_conn *isert_conn) } } +static void +isert_wait4cmds(struct iscsi_conn *conn) +{ + isert_info("iscsi_conn %p\n", conn); + + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); + } +} + /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2622,6 +2633,7 @@ static void isert_wait_conn(struct iscsi_conn *conn) ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); + isert_wait4cmds(conn); isert_wait4logout(isert_conn); queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index b7f7a5f7bd98..cd1181c39ed2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2546,7 +2546,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (lrsp->opcode == SRP_LOGIN_RSP) { ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); - ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; + ch->use_imm_data = srp_use_imm_data && + (lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP); ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, ch->use_imm_data, target->max_it_iu_size); diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 1ae6f8bba9ae..2c666fb34625 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -146,7 +146,6 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0042", /* Yoga */ "LEN0045", "LEN0047", - "LEN0049", "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ @@ -166,9 +165,11 @@ static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ + "LEN0049", /* Yoga 11e */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ + "LEN006c", /* T470s */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ @@ -179,6 +180,7 @@ static const char * const smbus_pnp_ids[] = { "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ "LEN200f", /* T450s */ + "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index d61731c0037d..b87b1e074f62 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1050,6 +1050,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, { const struct edt_i2c_chip_data *chip_data; struct edt_ft5x06_ts_data *tsdata; + u8 buf[2] = { 0xfc, 0x00 }; struct input_dev *input; unsigned long irq_flags; int error; @@ -1140,6 +1141,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, return error; } + /* + * Dummy read access. EP0700MLP1 returns bogus data on the first + * register read access and ignores writes. + */ + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf); + edt_ft5x06_ts_set_regs(tsdata); edt_ft5x06_ts_get_defaults(&client->dev, tsdata); edt_ft5x06_ts_get_parameters(tsdata); diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index 4a17096e83e1..84bf51d79888 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -321,7 +321,7 @@ static umode_t ili210x_calibrate_visible(struct kobject *kobj, struct i2c_client *client = to_i2c_client(dev); struct ili210x *priv = i2c_get_clientdata(client); - return priv->chip->has_calibrate_reg; + return priv->chip->has_calibrate_reg ? attr->mode : 0; } static const struct attribute_group ili210x_attr_group = { diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bd25674ee4db..025a7ccd6a64 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -230,11 +230,8 @@ static struct pci_dev *setup_aliases(struct device *dev) */ ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)]; if (ivrs_alias != pci_dev_id(pdev) && - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { - pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pci_info(pdev, "Added PCI DMA alias %02x.%d\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias)); - } + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) + pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); clone_aliases(pdev); @@ -3839,7 +3836,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; return modify_irte_ga(ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, NULL); + ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3865,7 +3862,7 @@ int amd_iommu_deactivate_guest_mode(void *data) APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); return modify_irte_ga(ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, NULL); + ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 483f7bc379fa..b5ae9f7c0510 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -147,7 +147,7 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; -static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; +static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; static bool amd_iommu_detected; static bool __initdata amd_iommu_disabled; @@ -1523,8 +1523,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; - if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0)) - amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; break; case 0x11: case 0x40: @@ -1534,8 +1532,15 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; - if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0)) - amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; + /* + * Note: Since iommu_update_intcapxt() leverages + * the IOMMU MMIO access to MSI capability block registers + * for MSI address lo/hi/data, we need to check both + * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support. + */ + if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && + (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) + amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; break; default: return -EINVAL; @@ -1996,8 +2001,8 @@ static int iommu_init_intcapxt(struct amd_iommu *iommu) struct irq_affinity_notify *notify = &iommu->intcapxt_notify; /** - * IntCapXT requires XTSup=1, which can be inferred - * amd_iommu_xt_mode. + * IntCapXT requires XTSup=1 and MsiCapMmioSup=1, + * which can be inferred from amd_iommu_xt_mode. */ if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) return 0; @@ -2528,6 +2533,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; acpi_status status; int i, remap_cache_sz, ret = 0; + u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2615,6 +2621,16 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + /* Disable IOMMU if there's Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + amd_iommu_disabled = true; + break; + } + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2723,7 +2739,7 @@ static int __init state_next(void) ret = early_amd_iommu_init(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled on kernel command-line\n"); + pr_info("AMD IOMMU disabled\n"); init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f52f59d5c6bd..798e1533a147 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -377,12 +377,12 @@ #define IOMMU_CAP_EFR 27 /* IOMMU Feature Reporting Field (for IVHD type 10h */ -#define IOMMU_FEAT_XTSUP_SHIFT 0 #define IOMMU_FEAT_GASUP_SHIFT 6 /* IOMMU Extended Feature Register (EFR) */ #define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 +#define IOMMU_EFR_MSICAPMMIOSUP_SHIFT 46 #define MAX_DOMAIN_ID 65536 diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index effe72eb89e7..6bd6a3f3f471 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -856,6 +856,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; @@ -1642,7 +1643,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..ba128d1cdaee 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -177,15 +177,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 3acfa6a25fa2..93f8e646cb0b 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) @@ -440,12 +448,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -471,14 +480,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -827,14 +836,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref @@ -1354,7 +1363,6 @@ void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid, struct qi_desc desc; if (mask) { - WARN_ON_ONCE(addr & ((1ULL << (VTD_PAGE_SHIFT + mask)) - 1)); addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE; } else diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 932267f49f9a..10176d8ea3e6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -732,6 +732,11 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } +static bool attach_deferred(struct device *dev) +{ + return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; +} + /** * is_downstream_to_pci_bridge - test if a device belongs to the PCI * sub-hierarchy of a candidate PCI-PCI bridge @@ -2424,8 +2429,7 @@ static struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; - if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO || - dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)) + if (unlikely(attach_deferred(dev) || iommu_dummy(dev))) return NULL; /* No lock here, assumes no domain exit in normal case */ @@ -2436,18 +2440,14 @@ static struct dmar_domain *find_domain(struct device *dev) return NULL; } -static struct dmar_domain *deferred_attach_domain(struct device *dev) +static void do_deferred_attach(struct device *dev) { - if (unlikely(dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO)) { - struct iommu_domain *domain; - - dev->archdata.iommu = NULL; - domain = iommu_get_domain_for_dev(dev); - if (domain) - intel_iommu_attach_device(domain, dev); - } + struct iommu_domain *domain; - return find_domain(dev); + dev->archdata.iommu = NULL; + domain = iommu_get_domain_for_dev(dev); + if (domain) + intel_iommu_attach_device(domain, dev); } static inline struct device_domain_info * @@ -2799,7 +2799,7 @@ static int identity_mapping(struct device *dev) struct device_domain_info *info; info = dev->archdata.iommu; - if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO) + if (info) return (info->domain == si_domain); return 0; @@ -3406,7 +3406,8 @@ static unsigned long intel_alloc_iova(struct device *dev, iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { - dev_err(dev, "Allocating %ld-page iova failed", nrpages); + dev_err_once(dev, "Allocating %ld-page iova failed\n", + nrpages); return 0; } @@ -3469,6 +3470,9 @@ static bool iommu_need_mapping(struct device *dev) if (iommu_dummy(dev)) return false; + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + ret = identity_mapping(dev); if (ret) { u64 dma_mask = *dev->dma_mask; @@ -3517,7 +3521,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - domain = deferred_attach_domain(dev); + domain = find_domain(dev); if (!domain) return DMA_MAPPING_ERROR; @@ -3737,7 +3741,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele if (!iommu_need_mapping(dev)) return dma_direct_map_sg(dev, sglist, nelems, dir, attrs); - domain = deferred_attach_domain(dev); + domain = find_domain(dev); if (!domain) return 0; @@ -3832,7 +3836,11 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, int prot = 0; int ret; - domain = deferred_attach_domain(dev); + if (unlikely(attach_deferred(dev))) + do_deferred_attach(dev); + + domain = find_domain(dev); + if (WARN_ON(dir == DMA_NONE || !domain)) return DMA_MAPPING_ERROR; @@ -4133,10 +4141,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); @@ -4319,12 +4328,18 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; struct dmar_rmrr_unit *rmrru; - int ret; rmrr = (struct acpi_dmar_reserved_memory *)header; - ret = arch_rmrr_sanity_check(rmrr); - if (ret) - return ret; + if (arch_rmrr_sanity_check(rmrr)) { + pr_warn(FW_BUG + "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + rmrr->base_address, rmrr->end_address, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + } rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) @@ -5053,6 +5068,7 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); + down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, @@ -5060,6 +5076,7 @@ int __init intel_iommu_init(void) iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); iommu_device_register(&iommu->iommu); } + up_read(&dmar_global_lock); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) @@ -5070,7 +5087,6 @@ int __init intel_iommu_init(void) down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); - up_read(&dmar_global_lock); /* Finally, we enable the DMA remapping hardware. */ for_each_iommu(iommu, drhd) { @@ -5079,6 +5095,8 @@ int __init intel_iommu_init(void) iommu_disable_protect_mem_regions(iommu); } + up_read(&dmar_global_lock); + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); intel_iommu_enabled = 1; @@ -5553,8 +5571,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } @@ -5984,7 +6004,7 @@ intel_iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev) static bool intel_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { - return dev->archdata.iommu == DEFER_DEVICE_DOMAIN_INFO; + return attach_deferred(dev); } const struct iommu_ops intel_iommu_ops = { diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 040a445be300..e7cb0b8a7332 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -499,8 +499,16 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, } #ifdef CONFIG_X86 - if (cpu_feature_enabled(X86_FEATURE_LA57)) - pasid_set_flpm(pte, 1); + /* Both CPU and IOMMU paging mode need to match */ + if (cpu_feature_enabled(X86_FEATURE_LA57)) { + if (cap_5lp_support(iommu->cap)) { + pasid_set_flpm(pte, 1); + } else { + pr_err("VT-d has no 5-level paging support for CPU\n"); + pasid_clear_entry(pte); + return -EINVAL; + } + } #endif /* CONFIG_X86 */ pasid_set_domain_id(pte, did); diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index dca88f9fdf29..518d0b2d12af 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -317,7 +317,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ ret = intel_pasid_alloc_id(svm, !!cap_caching_mode(iommu->cap), - pasid_max - 1, GFP_KERNEL); + pasid_max, GFP_KERNEL); if (ret < 0) { kfree(svm); kfree(sdev); @@ -654,11 +654,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (req->priv_data_present) memcpy(&resp.qw2, req->priv_data, sizeof(req->priv_data)); + resp.qw2 = 0; + resp.qw3 = 0; + qi_submit_sync(&resp, iommu); } - resp.qw2 = 0; - resp.qw3 = 0; - qi_submit_sync(&resp, iommu); - head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index c7a914b9bbbc..0e6a9536eca6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -233,7 +233,7 @@ static DEFINE_MUTEX(iova_cache_mutex); struct iova *alloc_iova_mem(void) { - return kmem_cache_zalloc(iova_cache, GFP_ATOMIC); + return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN); } EXPORT_SYMBOL(alloc_iova_mem); diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 52f38292df5b..c3de46acf50a 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -345,21 +345,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -405,7 +403,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -418,8 +416,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de ctx->domain = NULL; } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e05673bcd52b..50f89056c16b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -598,7 +598,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); @@ -1170,13 +1170,14 @@ static void its_send_vclear(struct its_device *dev, u32 event_id) */ static struct its_vlpi_map *get_vlpi_map(struct irq_data *d) { - struct its_device *its_dev = irq_data_get_irq_chip_data(d); - u32 event = its_get_event_id(d); + if (irqd_is_forwarded_to_vcpu(d)) { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); - if (!irqd_is_forwarded_to_vcpu(d)) - return NULL; + return dev_event_to_vlpi_map(its_dev, event); + } - return dev_event_to_vlpi_map(its_dev, event); + return NULL; } static void lpi_write_config(struct irq_data *d, u8 clr, u8 set) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d6218012097b..3f5baa5043db 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1821,6 +1821,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1915,8 +1916,10 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1946,8 +1949,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 3f09f658e8e2..6b566bba263b 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -374,6 +374,7 @@ static struct platform_driver mbigen_platform_driver = { .name = "Hisilicon MBIGEN-V2", .of_match_table = mbigen_of_match, .acpi_match_table = ACPI_PTR(mbigen_acpi_match), + .suppress_bind_attrs = true, }, .probe = mbigen_device_probe, }; diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c index 4afc317901a8..66cdc003b8f4 100644 --- a/drivers/leds/leds-pca963x.c +++ b/drivers/leds/leds-pca963x.c @@ -40,6 +40,8 @@ #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ +#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */ +#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */ #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ #define PCA963X_MODE1 0x00 @@ -438,12 +440,12 @@ static int pca963x_probe(struct i2c_client *client, PCA963X_MODE2); /* Configure output: open-drain or totem pole (push-pull) */ if (pdata->outdrv == PCA963X_OPEN_DRAIN) - mode2 |= 0x01; + mode2 &= ~PCA963X_MODE2_OUTDRV; else - mode2 |= 0x05; + mode2 |= PCA963X_MODE2_OUTDRV; /* Configure direction: normal or inverted */ if (pdata->dir == PCA963X_INVERTED) - mode2 |= 0x10; + mode2 |= PCA963X_MODE2_INVRT; i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, mode2); } diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index 8c744578122a..a0d87ed9da69 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -300,9 +300,11 @@ static int control_loop(void *dummy) /* i2c probing and setup */ /************************************************************************/ -static int -do_attach( struct i2c_adapter *adapter ) +static void do_attach(struct i2c_adapter *adapter) { + struct i2c_board_info info = { }; + struct device_node *np; + /* scan 0x48-0x4f (DS1775) and 0x2c-2x2f (ADM1030) */ static const unsigned short scan_ds1775[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, @@ -313,25 +315,24 @@ do_attach( struct i2c_adapter *adapter ) I2C_CLIENT_END }; - if( strncmp(adapter->name, "uni-n", 5) ) - return 0; - - if( !x.running ) { - struct i2c_board_info info; + if (x.running || strncmp(adapter->name, "uni-n", 5)) + return; - memset(&info, 0, sizeof(struct i2c_board_info)); - strlcpy(info.type, "therm_ds1775", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,ds1775"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,ds1775", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_ds1775, NULL); + } - strlcpy(info.type, "therm_adm1030", I2C_NAME_SIZE); + np = of_find_compatible_node(adapter->dev.of_node, NULL, "MAC,adm1030"); + if (np) { + of_node_put(np); + } else { + strlcpy(info.type, "MAC,adm1030", I2C_NAME_SIZE); i2c_new_probed_device(adapter, &info, scan_adm1030, NULL); - - if( x.thermostat && x.fan ) { - x.running = 1; - x.poll_task = kthread_run(control_loop, NULL, "g4fand"); - } } - return 0; } static int @@ -404,8 +405,8 @@ attach_thermostat( struct i2c_client *cl ) enum chip { ds1775, adm1030 }; static const struct i2c_device_id therm_windtunnel_id[] = { - { "therm_ds1775", ds1775 }, - { "therm_adm1030", adm1030 }, + { "MAC,ds1775", ds1775 }, + { "MAC,adm1030", adm1030 }, { } }; MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id); @@ -414,6 +415,7 @@ static int do_probe(struct i2c_client *cl, const struct i2c_device_id *id) { struct i2c_adapter *adapter = cl->adapter; + int ret = 0; if( !i2c_check_functionality(adapter, I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_WRITE_BYTE) ) @@ -421,11 +423,19 @@ do_probe(struct i2c_client *cl, const struct i2c_device_id *id) switch (id->driver_data) { case adm1030: - return attach_fan( cl ); + ret = attach_fan(cl); + break; case ds1775: - return attach_thermostat(cl); + ret = attach_thermostat(cl); + break; } - return 0; + + if (!x.running && x.thermostat && x.fan) { + x.running = 1; + x.poll_task = kthread_run(control_loop, NULL, "g4fand"); + } + + return ret; } static struct i2c_driver g4fan_driver = { diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c index 125605987b44..e7dec328c7cf 100644 --- a/drivers/macintosh/windfarm_ad7417_sensor.c +++ b/drivers/macintosh/windfarm_ad7417_sensor.c @@ -312,9 +312,16 @@ static const struct i2c_device_id wf_ad7417_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_ad7417_id); +static const struct of_device_id wf_ad7417_of_id[] = { + { .compatible = "ad7417", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_ad7417_of_id); + static struct i2c_driver wf_ad7417_driver = { .driver = { .name = "wf_ad7417", + .of_match_table = wf_ad7417_of_id, }, .probe = wf_ad7417_probe, .remove = wf_ad7417_remove, diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c index 67daeec94b44..2470e5a725c8 100644 --- a/drivers/macintosh/windfarm_fcu_controls.c +++ b/drivers/macintosh/windfarm_fcu_controls.c @@ -580,9 +580,16 @@ static const struct i2c_device_id wf_fcu_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_fcu_id); +static const struct of_device_id wf_fcu_of_id[] = { + { .compatible = "fcu", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_fcu_of_id); + static struct i2c_driver wf_fcu_driver = { .driver = { .name = "wf_fcu", + .of_match_table = wf_fcu_of_id, }, .probe = wf_fcu_probe, .remove = wf_fcu_remove, diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 282c28a17ea1..1e5fa09845e7 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -91,9 +92,14 @@ static int wf_lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct wf_lm75_sensor *lm; - int rc, ds1775 = id->driver_data; + int rc, ds1775; const char *name, *loc; + if (id) + ds1775 = id->driver_data; + else + ds1775 = !!of_device_get_match_data(&client->dev); + DBG("wf_lm75: creating %s device at address 0x%02x\n", ds1775 ? "ds1775" : "lm75", client->addr); @@ -164,9 +170,17 @@ static const struct i2c_device_id wf_lm75_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_lm75_id); +static const struct of_device_id wf_lm75_of_id[] = { + { .compatible = "lm75", .data = (void *)0}, + { .compatible = "ds1775", .data = (void *)1 }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_lm75_of_id); + static struct i2c_driver wf_lm75_driver = { .driver = { .name = "wf_lm75", + .of_match_table = wf_lm75_of_id, }, .probe = wf_lm75_probe, .remove = wf_lm75_remove, diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c index b03a33b803b7..d011899c0a8a 100644 --- a/drivers/macintosh/windfarm_lm87_sensor.c +++ b/drivers/macintosh/windfarm_lm87_sensor.c @@ -166,9 +166,16 @@ static const struct i2c_device_id wf_lm87_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_lm87_id); +static const struct of_device_id wf_lm87_of_id[] = { + { .compatible = "lm87cimt", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_lm87_of_id); + static struct i2c_driver wf_lm87_driver = { .driver = { .name = "wf_lm87", + .of_match_table = wf_lm87_of_id, }, .probe = wf_lm87_probe, .remove = wf_lm87_remove, diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c index e666cc020683..1e7b03d44ad9 100644 --- a/drivers/macintosh/windfarm_max6690_sensor.c +++ b/drivers/macintosh/windfarm_max6690_sensor.c @@ -120,9 +120,16 @@ static const struct i2c_device_id wf_max6690_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_max6690_id); +static const struct of_device_id wf_max6690_of_id[] = { + { .compatible = "max6690", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_max6690_of_id); + static struct i2c_driver wf_max6690_driver = { .driver = { .name = "wf_max6690", + .of_match_table = wf_max6690_of_id, }, .probe = wf_max6690_probe, .remove = wf_max6690_remove, diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index c84ec49c3741..cb75dc035616 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -341,9 +341,16 @@ static const struct i2c_device_id wf_sat_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_sat_id); +static const struct of_device_id wf_sat_of_id[] = { + { .compatible = "smu-sat", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_sat_of_id); + static struct i2c_driver wf_sat_driver = { .driver = { .name = "wf_smu_sat", + .of_match_table = wf_sat_of_id, }, .probe = wf_sat_probe, .remove = wf_sat_remove, diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 9198c1b480d9..78d1067e9e17 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -329,6 +329,9 @@ struct cached_dev { */ atomic_t has_dirty; +#define BCH_CACHE_READA_ALL 0 +#define BCH_CACHE_READA_META_ONLY 1 + unsigned int cache_readahead_policy; struct bch_ratelimit writeback_rate; struct delayed_work writeback_rate_update; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index c71365e7c1fa..a50dcfda656f 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -397,7 +397,8 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx) { diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index be2a2a201603..6730820780b0 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -417,10 +417,15 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) /* Journalling */ +#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask)) + static void btree_flush_write(struct cache_set *c) { struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR]; - unsigned int i, n; + unsigned int i, nr; + int ref_nr; + atomic_t *fifo_front_p, *now_fifo_front_p; + size_t mask; if (c->journal.btree_flushing) return; @@ -433,12 +438,50 @@ static void btree_flush_write(struct cache_set *c) c->journal.btree_flushing = true; spin_unlock(&c->journal.flush_write_lock); + /* get the oldest journal entry and check its refcount */ + spin_lock(&c->journal.lock); + fifo_front_p = &fifo_front(&c->journal.pin); + ref_nr = atomic_read(fifo_front_p); + if (ref_nr <= 0) { + /* + * do nothing if no btree node references + * the oldest journal entry + */ + spin_unlock(&c->journal.lock); + goto out; + } + spin_unlock(&c->journal.lock); + + mask = c->journal.pin.mask; + nr = 0; atomic_long_inc(&c->flush_write); memset(btree_nodes, 0, sizeof(btree_nodes)); - n = 0; mutex_lock(&c->bucket_lock); list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) { + /* + * It is safe to get now_fifo_front_p without holding + * c->journal.lock here, because we don't need to know + * the exactly accurate value, just check whether the + * front pointer of c->journal.pin is changed. + */ + now_fifo_front_p = &fifo_front(&c->journal.pin); + /* + * If the oldest journal entry is reclaimed and front + * pointer of c->journal.pin changes, it is unnecessary + * to scan c->btree_cache anymore, just quit the loop and + * flush out what we have already. + */ + if (now_fifo_front_p != fifo_front_p) + break; + /* + * quit this loop if all matching btree nodes are + * scanned and record in btree_nodes[] already. + */ + ref_nr = atomic_read(fifo_front_p); + if (nr >= ref_nr) + break; + if (btree_node_journal_flush(b)) pr_err("BUG: flush_write bit should not be set here!"); @@ -454,17 +497,44 @@ static void btree_flush_write(struct cache_set *c) continue; } + /* + * Only select the btree node which exactly references + * the oldest journal entry. + * + * If the journal entry pointed by fifo_front_p is + * reclaimed in parallel, don't worry: + * - the list_for_each_xxx loop will quit when checking + * next now_fifo_front_p. + * - If there are matched nodes recorded in btree_nodes[], + * they are clean now (this is why and how the oldest + * journal entry can be reclaimed). These selected nodes + * will be ignored and skipped in the folowing for-loop. + */ + if (nr_to_fifo_front(btree_current_write(b)->journal, + fifo_front_p, + mask) != 0) { + mutex_unlock(&b->write_lock); + continue; + } + set_btree_node_journal_flush(b); mutex_unlock(&b->write_lock); - btree_nodes[n++] = b; - if (n == BTREE_FLUSH_NR) + btree_nodes[nr++] = b; + /* + * To avoid holding c->bucket_lock too long time, + * only scan for BTREE_FLUSH_NR matched btree nodes + * at most. If there are more btree nodes reference + * the oldest journal entry, try to flush them next + * time when btree_flush_write() is called. + */ + if (nr == BTREE_FLUSH_NR) break; } mutex_unlock(&c->bucket_lock); - for (i = 0; i < n; i++) { + for (i = 0; i < nr; i++) { b = btree_nodes[i]; if (!b) { pr_err("BUG: btree_nodes[%d] is NULL", i); @@ -497,6 +567,7 @@ static void btree_flush_write(struct cache_set *c) mutex_unlock(&b->write_lock); } +out: spin_lock(&c->journal.flush_write_lock); c->journal.btree_flushing = false; spin_unlock(&c->journal.flush_write_lock); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 73478a91a342..820d8402a1dc 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) goto skip; /* - * Flag for bypass if the IO is for read-ahead or background, - * unless the read-ahead request is for metadata + * If the bio is for read-ahead or background IO, bypass it or + * not depends on the following situations, + * - If the IO is for meta data, always cache it and no bypass + * - If the IO is not meta data, check dc->cache_reada_policy, + * BCH_CACHE_READA_ALL: cache it and not bypass + * BCH_CACHE_READA_META_ONLY: not cache it and bypass + * That is, read-ahead request for metadata always get cached * (eg, for gfs2 or xfs). */ - if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && - !(bio->bi_opf & (REQ_META|REQ_PRIO))) - goto skip; + if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) { + if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) && + (dc->cache_readahead_policy != BCH_CACHE_READA_ALL)) + goto skip; + } if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index ba1c93791d8d..503aafe188dc 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -109,9 +109,13 @@ int bch_cache_accounting_add_kobjs(struct cache_accounting *acc, void bch_cache_accounting_clear(struct cache_accounting *acc) { - memset(&acc->total.cache_hits, - 0, - sizeof(struct cache_stats)); + acc->total.cache_hits = 0; + acc->total.cache_misses = 0; + acc->total.cache_bypass_hits = 0; + acc->total.cache_bypass_misses = 0; + acc->total.cache_readaheads = 0; + acc->total.cache_miss_collisions = 0; + acc->total.sectors_bypassed = 0; } void bch_cache_accounting_destroy(struct cache_accounting *acc) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 77e9869345e7..3b3724285d90 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1275,6 +1275,9 @@ static void cached_dev_free(struct closure *cl) mutex_unlock(&bch_register_lock); + if (dc->sb_bio.bi_inline_vecs[0].bv_page) + put_page(bio_first_page_all(&dc->sb_bio)); + if (!IS_ERR_OR_NULL(dc->bdev)) blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); @@ -2372,29 +2375,35 @@ static bool bch_is_open(struct block_device *bdev) static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { - ssize_t ret = -EINVAL; - const char *err = "cannot allocate memory"; + const char *err; char *path = NULL; - struct cache_sb *sb = NULL; + struct cache_sb *sb; struct block_device *bdev = NULL; - struct page *sb_page = NULL; + struct page *sb_page; + ssize_t ret; + ret = -EBUSY; + err = "failed to reference bcache module"; if (!try_module_get(THIS_MODULE)) - return -EBUSY; + goto out; /* For latest state of bcache_is_reboot */ smp_mb(); + err = "bcache is in reboot"; if (bcache_is_reboot) - return -EBUSY; + goto out_module_put; + ret = -ENOMEM; + err = "cannot allocate memory"; path = kstrndup(buffer, size, GFP_KERNEL); if (!path) - goto err; + goto out_module_put; sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL); if (!sb) - goto err; + goto out_free_path; + ret = -EINVAL; err = "failed to open device"; bdev = blkdev_get_by_path(strim(path), FMODE_READ|FMODE_WRITE|FMODE_EXCL, @@ -2411,57 +2420,69 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!IS_ERR(bdev)) bdput(bdev); if (attr == &ksysfs_register_quiet) - goto quiet_out; + goto done; } - goto err; + goto out_free_sb; } err = "failed to set blocksize"; if (set_blocksize(bdev, 4096)) - goto err_close; + goto out_blkdev_put; err = read_super(sb, bdev, &sb_page); if (err) - goto err_close; + goto out_blkdev_put; err = "failed to register device"; if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); if (!dc) - goto err_close; + goto out_put_sb_page; mutex_lock(&bch_register_lock); ret = register_bdev(sb, sb_page, bdev, dc); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ - if (ret < 0) - goto err; + if (ret < 0) { + bdev = NULL; + goto out_put_sb_page; + } } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) - goto err_close; + goto out_put_sb_page; /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(sb, sb_page, bdev, ca) != 0) - goto err; + if (register_cache(sb, sb_page, bdev, ca) != 0) { + bdev = NULL; + goto out_put_sb_page; + } } -quiet_out: - ret = size; -out: - if (sb_page) - put_page(sb_page); + + put_page(sb_page); +done: + kfree(sb); + kfree(path); + module_put(THIS_MODULE); + return size; + +out_put_sb_page: + put_page(sb_page); +out_blkdev_put: + if (bdev) + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); +out_free_sb: kfree(sb); +out_free_path: kfree(path); + path = NULL; +out_module_put: module_put(THIS_MODULE); +out: + pr_info("error %s: %s", path?path:"", err); return ret; - -err_close: - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -err: - pr_info("error %s: %s", path, err); - goto out; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 733e2ddf3c78..3470fae4eabc 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = { NULL }; +static const char * const bch_reada_cache_policies[] = { + "all", + "meta-only", + NULL +}; + /* Default is 0 ("auto") */ static const char * const bch_stop_on_failure_modes[] = { "auto", @@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us); rw_attribute(sequential_cutoff); rw_attribute(data_csum); rw_attribute(cache_mode); +rw_attribute(readahead_cache_policy); rw_attribute(stop_when_cache_set_failed); rw_attribute(writeback_metadata); rw_attribute(writeback_running); @@ -168,6 +175,11 @@ SHOW(__bch_cached_dev) bch_cache_modes, BDEV_CACHE_MODE(&dc->sb)); + if (attr == &sysfs_readahead_cache_policy) + return bch_snprint_string_list(buf, PAGE_SIZE, + bch_reada_cache_policies, + dc->cache_readahead_policy); + if (attr == &sysfs_stop_when_cache_set_failed) return bch_snprint_string_list(buf, PAGE_SIZE, bch_stop_on_failure_modes, @@ -353,6 +365,15 @@ STORE(__cached_dev) } } + if (attr == &sysfs_readahead_cache_policy) { + v = __sysfs_match_string(bch_reada_cache_policies, -1, buf); + if (v < 0) + return v; + + if ((unsigned int) v != dc->cache_readahead_policy) + dc->cache_readahead_policy = v; + } + if (attr == &sysfs_stop_when_cache_set_failed) { v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf); if (v < 0) @@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_data_csum, #endif &sysfs_cache_mode, + &sysfs_readahead_cache_policy, &sysfs_stop_when_cache_set_failed, &sysfs_writeback_metadata, &sysfs_writeback_running, diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index c82578af56a5..2ea0360108e1 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -20,8 +20,13 @@ struct dm_bio_details { struct gendisk *bi_disk; u8 bi_partno; + int __bi_remaining; unsigned long bi_flags; struct bvec_iter bi_iter; + bio_end_io_t *bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; +#endif }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) @@ -30,6 +35,11 @@ static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; + bd->__bi_remaining = atomic_read(&bio->__bi_remaining); + bd->bi_end_io = bio->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bd->bi_integrity = bio_integrity(bio); +#endif } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) @@ -38,6 +48,11 @@ static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; + atomic_set(&bio->__bi_remaining, bd->__bi_remaining); + bio->bi_end_io = bd->bi_end_io; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + bio->bi_integrity = bd->bi_integrity; +#endif } #endif diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2d32821b3a5b..f4be63671233 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2846,8 +2846,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eb9782fc93fe..492bbe0584d9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -331,8 +331,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); - int log = ilog2(bs); + unsigned bs; + int log; + + if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) + bs = crypto_aead_blocksize(any_tfm_aead(cc)); + else + bs = crypto_skcipher_blocksize(any_tfm(cc)); + log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ @@ -717,7 +723,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_wait wait; int err; - req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS); + req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO); if (!req) return -ENOMEM; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index b225b3e445fa..a82a9c257744 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -6,6 +6,8 @@ * This file is released under the GPL. */ +#include "dm-bio-record.h" + #include #include #include @@ -201,17 +203,19 @@ struct dm_integrity_c { __u8 log2_blocks_per_bitmap_bit; unsigned char mode; - int suspending; int failed; struct crypto_shash *internal_hash; + struct dm_target *ti; + /* these variables are locked with endio_wait.lock */ struct rb_root in_progress; struct list_head wait_list; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -293,11 +297,7 @@ struct dm_integrity_io { struct completion *completion; - struct gendisk *orig_bi_disk; - u8 orig_bi_partno; - bio_end_io_t *orig_bi_end_io; - struct bio_integrity_payload *orig_bi_integrity; - struct bvec_iter orig_bi_iter; + struct dm_bio_details bio_details; }; struct journal_completion { @@ -1439,7 +1439,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1450,14 +1450,9 @@ static void integrity_end_io(struct bio *bio) { struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); - bio->bi_iter = dio->orig_bi_iter; - bio->bi_disk = dio->orig_bi_disk; - bio->bi_partno = dio->orig_bi_partno; - if (dio->orig_bi_integrity) { - bio->bi_integrity = dio->orig_bi_integrity; + dm_bio_restore(&dio->bio_details, bio); + if (bio->bi_integrity) bio->bi_opf |= REQ_INTEGRITY; - } - bio->bi_end_io = dio->orig_bi_end_io; if (dio->completion) complete(dio->completion); @@ -1542,7 +1537,7 @@ static void integrity_metadata(struct work_struct *w) } } - __bio_for_each_segment(bv, bio, iter, dio->orig_bi_iter) { + __bio_for_each_segment(bv, bio, iter, dio->bio_details.bi_iter) { unsigned pos; char *mem, *checksums_ptr; @@ -1586,7 +1581,7 @@ static void integrity_metadata(struct work_struct *w) if (likely(checksums != checksums_onstack)) kfree(checksums); } else { - struct bio_integrity_payload *bip = dio->orig_bi_integrity; + struct bio_integrity_payload *bip = dio->bio_details.bi_integrity; if (bip) { struct bio_vec biv; @@ -1865,7 +1860,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -2005,20 +2000,13 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map } else dio->completion = NULL; - dio->orig_bi_iter = bio->bi_iter; - - dio->orig_bi_disk = bio->bi_disk; - dio->orig_bi_partno = bio->bi_partno; + dm_bio_record(&dio->bio_details, bio); bio_set_dev(bio, ic->dev->bdev); - - dio->orig_bi_integrity = bio_integrity(bio); bio->bi_integrity = NULL; bio->bi_opf &= ~REQ_INTEGRITY; - - dio->orig_bi_end_io = bio->bi_end_io; bio->bi_end_io = integrity_end_io; - bio->bi_iter.bi_size = dio->range.n_sectors << SECTOR_SHIFT; + generic_make_request(bio); if (need_sync_io) { @@ -2315,7 +2303,7 @@ static void integrity_writer(struct work_struct *w) unsigned prev_free_sectors; /* the following test is not needed, but it tests the replay code */ - if (READ_ONCE(ic->suspending) && !ic->meta_dev) + if (unlikely(dm_suspended(ic->ti)) && !ic->meta_dev) return; spin_lock_irq(&ic->endio_wait.lock); @@ -2376,7 +2364,7 @@ static void integrity_recalc(struct work_struct *w) next_chunk: - if (unlikely(READ_ONCE(ic->suspending))) + if (unlikely(dm_suspended(ic->ti))) goto unlock_ret; range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); @@ -2501,7 +2489,7 @@ static void bitmap_block_work(struct work_struct *w) dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) { remove_range(ic, &dio->range); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); } else { block_bitmap_op(ic, ic->journal, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET); @@ -2524,7 +2512,7 @@ static void bitmap_block_work(struct work_struct *w) remove_range(ic, &dio->range); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); } queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval); @@ -2804,8 +2792,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti) del_timer_sync(&ic->autocommit_timer); - WRITE_ONCE(ic->suspending, 1); - if (ic->recalc_wq) drain_workqueue(ic->recalc_wq); @@ -2834,8 +2820,6 @@ static void dm_integrity_postsuspend(struct dm_target *ti) #endif } - WRITE_ONCE(ic->suspending, 0); - BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); ic->journal_uptodate = true; @@ -2888,17 +2872,24 @@ static void dm_integrity_resume(struct dm_target *ti) } else { replay_journal(ic); if (ic->mode == 'B') { - int mode; ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); - mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR; - block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode); - block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode); - block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode); + block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_CLEAR); + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) && + le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors) { + block_bitmap_op(ic, ic->journal, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + block_bitmap_op(ic, ic->recalc_bitmap, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), + ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); + } rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } @@ -2967,7 +2958,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" meta_device:%s", ic->meta_dev->name); if (ic->sectors_per_block != 1) DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); - if (ic->recalculate_flag) + if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) DMEMIT(" recalculate"); DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); @@ -3623,6 +3614,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) } ti->private = ic; ti->per_io_data_size = sizeof(struct dm_integrity_io); + ic->ti = ti; ic->in_progress = RB_ROOT; INIT_LIST_HEAD(&ic->wait_list); @@ -3836,6 +3828,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -4140,6 +4140,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c412eaa975fc..9a18bef0a5ff 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -129,7 +129,9 @@ struct raid_dev { CTR_FLAG_RAID10_COPIES | \ CTR_FLAG_RAID10_FORMAT | \ CTR_FLAG_DELTA_DISKS | \ - CTR_FLAG_DATA_OFFSET) + CTR_FLAG_DATA_OFFSET | \ + CTR_FLAG_JOURNAL_DEV | \ + CTR_FLAG_JOURNAL_MODE) /* Valid options definitions per raid level... */ @@ -3001,7 +3003,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) { 1, 254, "Cannot understand number of raid devices parameters" } }; - /* Must have */ arg = dm_shift_arg(&as); if (!arg) { ti->error = "No arguments"; @@ -3508,8 +3509,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, unsigned long recovery; unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ unsigned int sz = 0; - unsigned int rebuild_disks; - unsigned int write_mostly_params = 0; + unsigned int rebuild_writemostly_count = 0; sector_t progress, resync_max_sectors, resync_mismatches; enum sync_state state; struct raid_type *rt; @@ -3593,18 +3593,20 @@ static void raid_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: /* Report the table line string you would use to construct this raid set */ - /* Calculate raid parameter count */ - for (i = 0; i < rs->raid_disks; i++) - if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) - write_mostly_params += 2; - rebuild_disks = memweight(rs->rebuild_disks, DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks)); - raid_param_cnt += rebuild_disks * 2 + - write_mostly_params + + /* + * Count any rebuild or writemostly argument pairs and subtract the + * hweight count being added below of any rebuild and writemostly ctr flags. + */ + for (i = 0; i < rs->raid_disks; i++) { + rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) + + (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0); + } + rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) + + (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0); + /* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */ + raid_param_cnt += rebuild_writemostly_count + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + - hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + - (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) + - (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0); - + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; /* Emit table line */ /* This has to be in the documented order for userspace! */ DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); @@ -3612,11 +3614,10 @@ static void raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC)); if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); - if (rebuild_disks) + if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) for (i = 0; i < rs->raid_disks; i++) - if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks)) - DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), - rs->dev[i].rdev.raid_disk); + if (test_bit(i, (void *) rs->rebuild_disks)) + DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i); if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), mddev->bitmap_info.daemon_sleep); @@ -3626,7 +3627,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), mddev->sync_speed_max); - if (write_mostly_params) + if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags)) for (i = 0; i < rs->raid_disks; i++) if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY), @@ -4029,7 +4030,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 15, 0}, + .version = {1, 15, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b88d6d701f5b..4cd8868f8004 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -387,16 +387,15 @@ static int subtree_equal(void *context, const void *value1_le, const void *value * Variant that is used for in-core only changes or code that * shouldn't put the pool in service on its own (e.g. commit). */ -static inline void __pmd_write_lock(struct dm_pool_metadata *pmd) +static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd) __acquires(pmd->root_lock) { down_write(&pmd->root_lock); } -#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd)) static inline void pmd_write_lock(struct dm_pool_metadata *pmd) { - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (unlikely(!pmd->in_service)) pmd->in_service = true; } @@ -831,6 +830,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) * We need to know if the thin_disk_superblock exceeds a 512-byte sector. */ BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); + BUG_ON(!rwsem_is_locked(&pmd->root_lock)); if (unlikely(!pmd->in_service)) return 0; @@ -953,12 +953,14 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } + pmd_write_lock_in_core(pmd); if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) DMWARN("%s: __commit_transaction() failed, error = %d", __func__, r); } + pmd_write_unlock(pmd); if (!pmd->fail_io) __destroy_persistent_data_objects(pmd); @@ -1841,7 +1843,7 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) * Care is taken to not have commit be what * triggers putting the thin-pool in-service. */ - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (pmd->fail_io) goto out; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 57626c27a54b..4fb6e89c8786 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -231,6 +231,7 @@ struct pool { struct dm_target *ti; /* Only set if a pool target is bound */ struct mapped_device *pool_md; + struct block_device *data_dev; struct block_device *md_dev; struct dm_pool_metadata *pmd; @@ -2933,6 +2934,7 @@ static struct kmem_cache *_new_mapping_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, + struct block_device *data_dev, unsigned long block_size, int read_only, char **error) { @@ -3040,6 +3042,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; pool->md_dev = metadata_dev; + pool->data_dev = data_dev; __pool_table_insert(pool); return pool; @@ -3081,6 +3084,7 @@ static void __pool_dec(struct pool *pool) static struct pool *__pool_find(struct mapped_device *pool_md, struct block_device *metadata_dev, + struct block_device *data_dev, unsigned long block_size, int read_only, char **error, int *created) { @@ -3091,19 +3095,23 @@ static struct pool *__pool_find(struct mapped_device *pool_md, *error = "metadata device already in use by a pool"; return ERR_PTR(-EBUSY); } + if (pool->data_dev != data_dev) { + *error = "data device already in use by a pool"; + return ERR_PTR(-EBUSY); + } __pool_inc(pool); } else { pool = __pool_table_lookup(pool_md); if (pool) { - if (pool->md_dev != metadata_dev) { + if (pool->md_dev != metadata_dev || pool->data_dev != data_dev) { *error = "different pool cannot replace a pool"; return ERR_PTR(-EINVAL); } __pool_inc(pool); } else { - pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); + pool = pool_create(pool_md, metadata_dev, data_dev, block_size, read_only, error); *created = 1; } } @@ -3356,7 +3364,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out; } - pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, + pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, data_dev->bdev, block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); if (IS_ERR(pool)) { r = PTR_ERR(pool); @@ -3408,10 +3416,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) if (r) goto out_flags_changed; - dm_pool_register_pre_commit_callback(pt->pool->pmd, - metadata_pre_commit_callback, - pt); - pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); @@ -3574,6 +3578,9 @@ static int pool_preresume(struct dm_target *ti) if (r) return r; + dm_pool_register_pre_commit_callback(pool->pmd, + metadata_pre_commit_callback, pt); + r = maybe_resize_data_dev(ti, &need_commit1); if (r) return r; @@ -4099,7 +4106,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 21, 0}, + .version = {1, 22, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4476,7 +4483,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 21, 0}, + .version = {1, 22, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 7d727a72aa13..cd9e4c8a023a 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -442,7 +442,13 @@ static void writecache_notify_io(unsigned long error, void *context) complete(&endio->c); } -static void ssd_commit_flushed(struct dm_writecache *wc) +static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) +{ + wait_event(wc->bio_in_progress_wait[direction], + !atomic_read(&wc->bio_in_progress[direction])); +} + +static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { struct dm_io_region region; struct dm_io_request req; @@ -488,17 +494,20 @@ static void ssd_commit_flushed(struct dm_writecache *wc) writecache_notify_io(0, &endio); wait_for_completion_io(&endio.c); + if (wait_for_ios) + writecache_wait_for_ios(wc, WRITE); + writecache_disk_flush(wc, wc->ssd_dev); memset(wc->dirty_bitmap, 0, wc->dirty_bitmap_size); } -static void writecache_commit_flushed(struct dm_writecache *wc) +static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) { if (WC_MODE_PMEM(wc)) wmb(); else - ssd_commit_flushed(wc); + ssd_commit_flushed(wc, wait_for_ios); } static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) @@ -522,12 +531,6 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) writecache_error(wc, r, "error flushing metadata: %d", r); } -static void writecache_wait_for_ios(struct dm_writecache *wc, int direction) -{ - wait_event(wc->bio_in_progress_wait[direction], - !atomic_read(&wc->bio_in_progress[direction])); -} - #define WFE_RETURN_FOLLOWING 1 #define WFE_LOWEST_SEQ 2 @@ -622,6 +625,12 @@ static void writecache_add_to_freelist(struct dm_writecache *wc, struct wc_entry wc->freelist_size++; } +static inline void writecache_verify_watermark(struct dm_writecache *wc) +{ + if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) + queue_work(wc->writeback_wq, &wc->writeback_work); +} + static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) { struct wc_entry *e; @@ -643,8 +652,8 @@ static struct wc_entry *writecache_pop_from_freelist(struct dm_writecache *wc) list_del(&e->lru); } wc->freelist_size--; - if (unlikely(wc->freelist_size + wc->writeback_size <= wc->freelist_high_watermark)) - queue_work(wc->writeback_wq, &wc->writeback_work); + + writecache_verify_watermark(wc); return e; } @@ -724,15 +733,12 @@ static void writecache_flush(struct dm_writecache *wc) e = e2; cond_resched(); } - writecache_commit_flushed(wc); - - if (!WC_MODE_PMEM(wc)) - writecache_wait_for_ios(wc, WRITE); + writecache_commit_flushed(wc, true); wc->seq_count++; pmem_assign(sb(wc)->seq_count, cpu_to_le64(wc->seq_count)); writecache_flush_region(wc, &sb(wc)->seq_count, sizeof sb(wc)->seq_count); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc->overwrote_committed = false; @@ -756,7 +762,7 @@ static void writecache_flush(struct dm_writecache *wc) } if (need_flush_after_free) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } static void writecache_flush_work(struct work_struct *work) @@ -809,7 +815,7 @@ static void writecache_discard(struct dm_writecache *wc, sector_t start, sector_ } if (discarded_something) - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } static bool writecache_wait_for_writeback(struct dm_writecache *wc) @@ -838,7 +844,7 @@ static void writecache_suspend(struct dm_target *ti) } wc_unlock(wc); - flush_workqueue(wc->writeback_wq); + drain_workqueue(wc->writeback_wq); wc_lock(wc); if (flush_on_suspend) @@ -958,9 +964,11 @@ static void writecache_resume(struct dm_target *ti) if (need_flush) { writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); } + writecache_verify_watermark(wc); + wc_unlock(wc); } @@ -1342,7 +1350,7 @@ static void __writecache_endio_pmem(struct dm_writecache *wc, struct list_head * wc->writeback_size--; n_walked++; if (unlikely(n_walked >= ENDIO_LATENCY)) { - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc_unlock(wc); wc_lock(wc); n_walked = 0; @@ -1423,7 +1431,7 @@ static int writecache_endio_thread(void *data) writecache_wait_for_ios(wc, READ); } - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); wc_unlock(wc); } @@ -1766,10 +1774,10 @@ static int init_memory(struct dm_writecache *wc) write_original_sector_seq_count(wc, &wc->entries[b], -1, -1); writecache_flush_all_metadata(wc); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); pmem_assign(sb(wc)->magic, cpu_to_le32(MEMORY_SUPERBLOCK_MAGIC)); writecache_flush_region(wc, &sb(wc)->magic, sizeof sb(wc)->magic); - writecache_commit_flushed(wc); + writecache_commit_flushed(wc, false); return 0; } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 22b3cb0050a7..516c7b671d25 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -134,6 +134,7 @@ struct dmz_metadata { sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; + unsigned int zone_bits_per_mblk; unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; @@ -1161,7 +1162,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd) /* Init */ zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; - zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT; + zmd->zone_nr_bitmap_blocks = + max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); + zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); @@ -1956,7 +1960,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, dmz_release_mblock(zmd, to_mblk); dmz_release_mblock(zmd, from_mblk); - chunk_block += DMZ_BLOCK_SIZE_BITS; + chunk_block += zmd->zone_bits_per_mblk; } to_zone->weight = from_zone->weight; @@ -2017,7 +2021,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Set bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits); if (count) { @@ -2096,7 +2100,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Clear bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_clear_bits((unsigned long *)mblk->data, bit, nr_bits); @@ -2156,6 +2160,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, { struct dmz_mblock *mblk; unsigned int bit, set_bit, nr_bits; + unsigned int zone_bits = zmd->zone_bits_per_mblk; unsigned long *bitmap; int n = 0; @@ -2170,15 +2175,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, /* Get offset */ bitmap = (unsigned long *) mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zone_bits - bit); if (set) - set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_bit(bitmap, zone_bits, bit); else - set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_zero_bit(bitmap, zone_bits, bit); dmz_release_mblock(zmd, mblk); n += set_bit - bit; - if (set_bit < DMZ_BLOCK_SIZE_BITS) + if (set_bit < zone_bits) break; nr_blocks -= nr_bits; @@ -2281,7 +2286,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) /* Count bits in this block */ bitmap = mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); n += dmz_count_bits(bitmap, bit, nr_bits); dmz_release_mblock(zmd, mblk); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 70a1063161c0..b1e64cd31647 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -533,8 +533,9 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) /* Get the BIO chunk work. If one is not active yet, create one */ cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk); - if (!cw) { - + if (cw) { + dmz_get_chunk_work(cw); + } else { /* Create a new chunk work */ cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO); if (unlikely(!cw)) { @@ -543,7 +544,7 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) } INIT_WORK(&cw->work, dmz_chunk_work); - refcount_set(&cw->refcount, 0); + refcount_set(&cw->refcount, 1); cw->target = dmz; cw->chunk = chunk; bio_list_init(&cw->bio_list); @@ -556,7 +557,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio) } bio_list_add(&cw->bio_list, bio); - dmz_get_chunk_work(cw); dmz_reclaim_bio_acc(dmz->reclaim); if (queue_work(dmz->chunk_wq, &cw->work)) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e8f9661a10a1..0413018c8305 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1788,7 +1788,8 @@ static int dm_any_congested(void *congested_data, int bdi_bits) * With request-based DM we only need to check the * top-level queue for congestion. */ - r = md->queue->backing_dev_info->wb.state & bdi_bits; + struct backing_dev_info *bdi = md->queue->backing_dev_info; + r = bdi->wb.congested->state & bdi_bits; } else { map = dm_get_live_table_fast(md); if (map) @@ -1854,14 +1855,6 @@ static const struct dax_operations dm_dax_ops; static void dm_wq_work(struct work_struct *work); -static void dm_init_normal_md_queue(struct mapped_device *md) -{ - /* - * Initialize aspects of queue that aren't relevant for blk-mq - */ - md->queue->backing_dev_info->congested_fn = dm_any_congested; -} - static void cleanup_mapped_device(struct mapped_device *md) { if (md->wq) @@ -1949,7 +1942,12 @@ static struct mapped_device *alloc_dev(int minor) if (!md->queue) goto bad; md->queue->queuedata = md; - md->queue->backing_dev_info->congested_data = md; + /* + * default to bio-based required ->make_request_fn until DM + * table is loaded and md->type established. If request-based + * table is loaded: blk-mq will override accordingly. + */ + blk_queue_make_request(md->queue, dm_make_request); md->disk = alloc_disk_node(1, md->numa_node_id); if (!md->disk) @@ -2243,6 +2241,12 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); +static void dm_init_congested_fn(struct mapped_device *md) +{ + md->queue->backing_dev_info->congested_data = md; + md->queue->backing_dev_info->congested_fn = dm_any_congested; +} + /* * Setup the DM device's queue based on md's type */ @@ -2259,12 +2263,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) DMERR("Cannot initialize queue for request-based dm-mq mapped device"); return r; } + dm_init_congested_fn(md); break; case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: case DM_TYPE_NVME_BIO_BASED: - dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request); + dm_init_congested_fn(md); break; case DM_TYPE_NONE: WARN_ON_ONCE(true); @@ -2363,6 +2367,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) map = dm_get_live_table(md, &srcu_idx); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); + set_bit(DMF_SUSPENDED, &md->flags); dm_table_postsuspend_targets(map); } /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index bd68f6fef694..d8b4125e338c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -380,6 +380,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *b) +{ + int r; + uint32_t count; + + do { + r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); + if (r) + break; + + /* double check this block wasn't used in the old transaction */ + if (*b >= old_ll->nr_blocks) + count = 0; + else { + r = sm_ll_lookup(old_ll, *b, &count); + if (r) + break; + + if (count) + begin = *b + 1; + } + } while (count); + + return r; +} + static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, int (*mutator)(void *context, uint32_t old, uint32_t *new), void *context, enum allocation_event *ev) diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index b3078d5eda0c..8de63ce39bdd 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 32adf6b4a9c7..bf4c5e2ccb6f 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - /* FIXME: we should loop round a couple of times */ - r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 25328582cc48..9e3c64ec2026 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -448,7 +448,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/media/i2c/adv748x/adv748x.h b/drivers/media/i2c/adv748x/adv748x.h index 5042f9e94aee..fccb388ce179 100644 --- a/drivers/media/i2c/adv748x/adv748x.h +++ b/drivers/media/i2c/adv748x/adv748x.h @@ -394,10 +394,10 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define io_read(s, r) adv748x_read(s, ADV748X_PAGE_IO, r) #define io_write(s, r, v) adv748x_write(s, ADV748X_PAGE_IO, r, v) -#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~m) | v) +#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~(m)) | (v)) #define hdmi_read(s, r) adv748x_read(s, ADV748X_PAGE_HDMI, r) -#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, r+1)) & m) +#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, (r)+1)) & (m)) #define hdmi_write(s, r, v) adv748x_write(s, ADV748X_PAGE_HDMI, r, v) #define repeater_read(s, r) adv748x_read(s, ADV748X_PAGE_REPEATER, r) @@ -405,11 +405,11 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define sdp_read(s, r) adv748x_read(s, ADV748X_PAGE_SDP, r) #define sdp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_SDP, r, v) -#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~m) | v) +#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~(m)) | (v)) #define cp_read(s, r) adv748x_read(s, ADV748X_PAGE_CP, r) #define cp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_CP, r, v) -#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~m) | v) +#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~(m)) | (v)) #define tx_read(t, r) adv748x_read(t->state, t->page, r) #define tx_write(t, r, v) adv748x_write(t->state, t->page, r, v) diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c index 4b9b98cf6674..5bd3ae82992f 100644 --- a/drivers/media/i2c/mt9v032.c +++ b/drivers/media/i2c/mt9v032.c @@ -428,10 +428,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_mbus_code_enum *code) { + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + if (code->index > 0) return -EINVAL; - code->code = MEDIA_BUS_FMT_SGRBG10_1X10; + code->code = mt9v032->format.code; return 0; } @@ -439,7 +441,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_frame_size_enum *fse) { - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10) + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + + if (fse->index >= 3) + return -EINVAL; + if (mt9v032->format.code != fse->code) return -EINVAL; fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index); diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c index 5e495c833d32..bb968e764f31 100644 --- a/drivers/media/i2c/ov5640.c +++ b/drivers/media/i2c/ov5640.c @@ -874,7 +874,7 @@ static unsigned long ov5640_calc_sys_clk(struct ov5640_dev *sensor, * We have reached the maximum allowed PLL1 output, * increase sysdiv. */ - if (!rate) + if (!_rate) break; /* diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c index 7c429ce98bae..668770e9f609 100644 --- a/drivers/media/mc/mc-entity.c +++ b/drivers/media/mc/mc-entity.c @@ -639,9 +639,9 @@ int media_get_pad_index(struct media_entity *entity, bool is_sink, return -EINVAL; for (i = 0; i < entity->num_pads; i++) { - if (entity->pads[i].flags == MEDIA_PAD_FL_SINK) + if (entity->pads[i].flags & MEDIA_PAD_FL_SINK) pad_is_sink = true; - else if (entity->pads[i].flags == MEDIA_PAD_FL_SOURCE) + else if (entity->pads[i].flags & MEDIA_PAD_FL_SOURCE) pad_is_sink = false; else continue; /* This is an error! */ diff --git a/drivers/media/pci/cx23885/cx23885-cards.c b/drivers/media/pci/cx23885/cx23885-cards.c index 8644205d3cd3..8e5a2c580821 100644 --- a/drivers/media/pci/cx23885/cx23885-cards.c +++ b/drivers/media/pci/cx23885/cx23885-cards.c @@ -801,6 +801,25 @@ struct cx23885_board cx23885_boards[] = { .name = "Hauppauge WinTV-Starburst2", .portb = CX23885_MPEG_DVB, }, + [CX23885_BOARD_AVERMEDIA_CE310B] = { + .name = "AVerMedia CE310B", + .porta = CX23885_ANALOG_VIDEO, + .force_bff = 1, + .input = {{ + .type = CX23885_VMUX_COMPOSITE1, + .vmux = CX25840_VIN1_CH1 | + CX25840_NONE_CH2 | + CX25840_NONE0_CH3, + .amux = CX25840_AUDIO7, + }, { + .type = CX23885_VMUX_SVIDEO, + .vmux = CX25840_VIN8_CH1 | + CX25840_NONE_CH2 | + CX25840_VIN7_CH3 | + CX25840_SVIDEO_ON, + .amux = CX25840_AUDIO7, + } }, + }, }; const unsigned int cx23885_bcount = ARRAY_SIZE(cx23885_boards); @@ -1124,6 +1143,10 @@ struct cx23885_subid cx23885_subids[] = { .subvendor = 0x0070, .subdevice = 0xf02a, .card = CX23885_BOARD_HAUPPAUGE_STARBURST2, + }, { + .subvendor = 0x1461, + .subdevice = 0x3100, + .card = CX23885_BOARD_AVERMEDIA_CE310B, }, }; const unsigned int cx23885_idcount = ARRAY_SIZE(cx23885_subids); @@ -2348,6 +2371,7 @@ void cx23885_card_setup(struct cx23885_dev *dev) case CX23885_BOARD_DVBSKY_T982: case CX23885_BOARD_VIEWCAST_260E: case CX23885_BOARD_VIEWCAST_460E: + case CX23885_BOARD_AVERMEDIA_CE310B: dev->sd_cx25840 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_bus[2].i2c_adap, "cx25840", 0x88 >> 1, NULL); diff --git a/drivers/media/pci/cx23885/cx23885-video.c b/drivers/media/pci/cx23885/cx23885-video.c index 8098b15493de..7fc408ee4934 100644 --- a/drivers/media/pci/cx23885/cx23885-video.c +++ b/drivers/media/pci/cx23885/cx23885-video.c @@ -257,7 +257,8 @@ static int cx23885_video_mux(struct cx23885_dev *dev, unsigned int input) (dev->board == CX23885_BOARD_MYGICA_X8507) || (dev->board == CX23885_BOARD_AVERMEDIA_HC81R) || (dev->board == CX23885_BOARD_VIEWCAST_260E) || - (dev->board == CX23885_BOARD_VIEWCAST_460E)) { + (dev->board == CX23885_BOARD_VIEWCAST_460E) || + (dev->board == CX23885_BOARD_AVERMEDIA_CE310B)) { /* Configure audio routing */ v4l2_subdev_call(dev->sd_cx25840, audio, s_routing, INPUT(input)->amux, 0, 0); diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h index a95a2e4c6a0d..c472498e57c4 100644 --- a/drivers/media/pci/cx23885/cx23885.h +++ b/drivers/media/pci/cx23885/cx23885.h @@ -101,6 +101,7 @@ #define CX23885_BOARD_HAUPPAUGE_STARBURST2 59 #define CX23885_BOARD_HAUPPAUGE_QUADHD_DVB_885 60 #define CX23885_BOARD_HAUPPAUGE_QUADHD_ATSC_885 61 +#define CX23885_BOARD_AVERMEDIA_CE310B 62 #define GPIO_0 0x00000001 #define GPIO_1 0x00000002 diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index 4372abbb5950..a74e9fd65238 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -14,8 +14,8 @@ #define MAX_SRC_WIDTH 2048 /* Reset & boot poll config */ -#define POLL_RST_MAX 50 -#define POLL_RST_DELAY_MS 20 +#define POLL_RST_MAX 500 +#define POLL_RST_DELAY_MS 2 enum bdisp_target_plan { BDISP_RGB, @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp) for (i = 0; i < POLL_RST_MAX; i++) { if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE) break; - msleep(POLL_RST_DELAY_MS); + udelay(POLL_RST_DELAY_MS * 1000); } if (i == POLL_RST_MAX) dev_err(bdisp->dev, "Reset timeout\n"); diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c index f36dc6258900..b8b07c1de2a8 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -155,6 +156,27 @@ static int sun4i_csi_probe(struct platform_device *pdev) subdev = &csi->subdev; vdev = &csi->vdev; + /* + * On Allwinner SoCs, some high memory bandwidth devices do DMA + * directly over the memory bus (called MBUS), instead of the + * system bus. The memory bus has a different addressing scheme + * without the DRAM starting offset. + * + * In some cases this can be described by an interconnect in + * the device tree. In other cases where the hardware is not + * fully understood and the interconnect is left out of the + * device tree, fall back to a default offset. + */ + if (of_find_property(csi->dev->of_node, "interconnects", NULL)) { + ret = of_dma_configure(csi->dev, csi->dev->of_node, true); + if (ret) + return ret; + } else { +#ifdef PHYS_PFN_OFFSET + csi->dev->dma_pfn_offset = PHYS_PFN_OFFSET; +#endif + } + csi->mdev.dev = csi->dev; strscpy(csi->mdev.model, "Allwinner Video Capture Device", sizeof(csi->mdev.model)); diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.h b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.h index 001c8bde006c..88d39b3554c4 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.h +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.h @@ -22,8 +22,8 @@ #define CSI_CFG_INPUT_FMT(fmt) ((fmt) << 20) #define CSI_CFG_OUTPUT_FMT(fmt) ((fmt) << 16) #define CSI_CFG_YUV_DATA_SEQ(seq) ((seq) << 8) -#define CSI_CFG_VSYNC_POL(pol) ((pol) << 2) -#define CSI_CFG_HSYNC_POL(pol) ((pol) << 1) +#define CSI_CFG_VREF_POL(pol) ((pol) << 2) +#define CSI_CFG_HREF_POL(pol) ((pol) << 1) #define CSI_CFG_PCLK_POL(pol) ((pol) << 0) #define CSI_CPT_CTRL_REG 0x08 diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_dma.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_dma.c index d6979e11a67b..78fa1c535ac6 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_dma.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_dma.c @@ -228,7 +228,7 @@ static int sun4i_csi_start_streaming(struct vb2_queue *vq, unsigned int count) struct sun4i_csi *csi = vb2_get_drv_priv(vq); struct v4l2_fwnode_bus_parallel *bus = &csi->bus; const struct sun4i_csi_format *csi_fmt; - unsigned long hsync_pol, pclk_pol, vsync_pol; + unsigned long href_pol, pclk_pol, vref_pol; unsigned long flags; unsigned int i; int ret; @@ -278,13 +278,21 @@ static int sun4i_csi_start_streaming(struct vb2_queue *vq, unsigned int count) writel(CSI_WIN_CTRL_H_ACTIVE(csi->fmt.height), csi->regs + CSI_WIN_CTRL_H_REG); - hsync_pol = !!(bus->flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH); - pclk_pol = !!(bus->flags & V4L2_MBUS_DATA_ACTIVE_HIGH); - vsync_pol = !!(bus->flags & V4L2_MBUS_VSYNC_ACTIVE_HIGH); + /* + * This hardware uses [HV]REF instead of [HV]SYNC. Based on the + * provided timing diagrams in the manual, positive polarity + * equals active high [HV]REF. + * + * When the back porch is 0, [HV]REF is more or less equivalent + * to [HV]SYNC inverted. + */ + href_pol = !!(bus->flags & V4L2_MBUS_HSYNC_ACTIVE_LOW); + vref_pol = !!(bus->flags & V4L2_MBUS_VSYNC_ACTIVE_LOW); + pclk_pol = !!(bus->flags & V4L2_MBUS_PCLK_SAMPLE_RISING); writel(CSI_CFG_INPUT_FMT(csi_fmt->input) | CSI_CFG_OUTPUT_FMT(csi_fmt->output) | - CSI_CFG_VSYNC_POL(vsync_pol) | - CSI_CFG_HSYNC_POL(hsync_pol) | + CSI_CFG_VREF_POL(vref_pol) | + CSI_CFG_HREF_POL(href_pol) | CSI_CFG_PCLK_POL(pclk_pol), csi->regs + CSI_CFG_REG); diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c index 3c93d9232c3c..b6e39fbd8ad5 100644 --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c @@ -27,17 +27,17 @@ static const struct v4l2_fwht_pixfmt_info v4l2_fwht_pixfmts[] = { { V4L2_PIX_FMT_BGR24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_RGB24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_HSV24, 3, 3, 1, 3, 3, 1, 1, 3, 1, FWHT_FL_PIXENC_HSV}, - { V4L2_PIX_FMT_BGR32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_XBGR32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_BGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_XBGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_ABGR32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_RGB32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_XRGB32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_RGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_XRGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_ARGB32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_BGRX32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_BGRX32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_BGRA32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_RGBX32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_RGB}, + { V4L2_PIX_FMT_RGBX32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, { V4L2_PIX_FMT_RGBA32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_RGB}, - { V4L2_PIX_FMT_HSV32, 4, 4, 1, 4, 4, 1, 1, 3, 1, FWHT_FL_PIXENC_HSV}, + { V4L2_PIX_FMT_HSV32, 4, 4, 1, 4, 4, 1, 1, 4, 1, FWHT_FL_PIXENC_HSV}, { V4L2_PIX_FMT_GREY, 1, 1, 1, 1, 0, 1, 1, 1, 1, FWHT_FL_PIXENC_RGB}, }; @@ -175,22 +175,14 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, case V4L2_PIX_FMT_RGB32: case V4L2_PIX_FMT_XRGB32: case V4L2_PIX_FMT_HSV32: - rf->cr = rf->luma + 1; - rf->cb = rf->cr + 2; - rf->luma += 2; - break; - case V4L2_PIX_FMT_BGR32: - case V4L2_PIX_FMT_XBGR32: - rf->cb = rf->luma; - rf->cr = rf->cb + 2; - rf->luma++; - break; case V4L2_PIX_FMT_ARGB32: rf->alpha = rf->luma; rf->cr = rf->luma + 1; rf->cb = rf->cr + 2; rf->luma += 2; break; + case V4L2_PIX_FMT_BGR32: + case V4L2_PIX_FMT_XBGR32: case V4L2_PIX_FMT_ABGR32: rf->cb = rf->luma; rf->cr = rf->cb + 2; @@ -198,10 +190,6 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, rf->alpha = rf->cr + 1; break; case V4L2_PIX_FMT_BGRX32: - rf->cb = rf->luma + 1; - rf->cr = rf->cb + 2; - rf->luma += 2; - break; case V4L2_PIX_FMT_BGRA32: rf->alpha = rf->luma; rf->cb = rf->luma + 1; @@ -209,10 +197,6 @@ static int prepare_raw_frame(struct fwht_raw_frame *rf, rf->luma += 2; break; case V4L2_PIX_FMT_RGBX32: - rf->cr = rf->luma; - rf->cb = rf->cr + 2; - rf->luma++; - break; case V4L2_PIX_FMT_RGBA32: rf->alpha = rf->luma + 3; rf->cr = rf->luma; diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 872d6441e512..a7deca1fefb7 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -413,7 +413,7 @@ static int iguanair_probe(struct usb_interface *intf, int ret, pipein, pipeout; struct usb_host_interface *idesc; - idesc = intf->altsetting; + idesc = intf->cur_altsetting; if (idesc->desc.bNumEndpoints < 2) return -ENODEV; diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 7741151606ef..6f80c251f641 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1891,23 +1891,28 @@ int rc_register_device(struct rc_dev *dev) dev->registered = true; - if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { - rc = rc_setup_rx_device(dev); - if (rc) - goto out_dev; - } - - /* Ensure that the lirc kfifo is setup before we start the thread */ + /* + * once the the input device is registered in rc_setup_rx_device, + * userspace can open the input device and rc_open() will be called + * as a result. This results in driver code being allowed to submit + * keycodes with rc_keydown, so lirc must be registered first. + */ if (dev->allowed_protocols != RC_PROTO_BIT_CEC) { rc = ir_lirc_register(dev); if (rc < 0) - goto out_rx; + goto out_dev; + } + + if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { + rc = rc_setup_rx_device(dev); + if (rc) + goto out_lirc; } if (dev->driver_type == RC_DRIVER_IR_RAW) { rc = ir_raw_event_register(dev); if (rc < 0) - goto out_lirc; + goto out_rx; } dev_dbg(&dev->dev, "Registered rc%u (driver: %s)\n", dev->minor, @@ -1915,11 +1920,11 @@ int rc_register_device(struct rc_dev *dev) return 0; +out_rx: + rc_free_rx_device(dev); out_lirc: if (dev->allowed_protocols != RC_PROTO_BIT_CEC) ir_lirc_unregister(dev); -out_rx: - rc_free_rx_device(dev); out_dev: device_del(&dev->dev); out_rx_free: diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index ac93e88d7038..89b4b5d84cdf 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -554,7 +554,7 @@ static int af9005_boot_packet(struct usb_device *udev, int type, u8 *reply, u8 *buf, int size) { u16 checksum; - int act_len, i, ret; + int act_len = 0, i, ret; memset(buf, 0, size); buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff); diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index dd5bb230cec1..99a39339d45d 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -230,18 +230,22 @@ static struct rc_map_table rc_map_digitv_table[] = { static int digitv_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { - int i; + int ret, i; u8 key[5]; u8 b[4] = { 0 }; *event = 0; *state = REMOTE_NO_KEY_PRESSED; - digitv_ctrl_msg(d,USB_READ_REMOTE,0,NULL,0,&key[1],4); + ret = digitv_ctrl_msg(d, USB_READ_REMOTE, 0, NULL, 0, &key[1], 4); + if (ret) + return ret; /* Tell the device we've read the remote. Not sure how necessary this is, but the Nebula SDK does it. */ - digitv_ctrl_msg(d,USB_WRITE_REMOTE,0,b,4,NULL,0); + ret = digitv_ctrl_msg(d, USB_WRITE_REMOTE, 0, b, 4, NULL, 0); + if (ret) + return ret; /* if something is inside the buffer, simulate key press */ if (key[1] != 0) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-urb.c b/drivers/media/usb/dvb-usb/dvb-usb-urb.c index c1b4e94a37f8..2aabf90d8697 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-urb.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-urb.c @@ -12,7 +12,7 @@ int dvb_usb_generic_rw(struct dvb_usb_device *d, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen, int delay_ms) { - int actlen,ret = -ENOMEM; + int actlen = 0, ret = -ENOMEM; if (!d || wbuf == NULL || wlen == 0) return -EINVAL; diff --git a/drivers/media/usb/dvb-usb/vp7045.c b/drivers/media/usb/dvb-usb/vp7045.c index 80c1cf05384b..2baf57216d19 100644 --- a/drivers/media/usb/dvb-usb/vp7045.c +++ b/drivers/media/usb/dvb-usb/vp7045.c @@ -96,10 +96,14 @@ static int vp7045_power_ctrl(struct dvb_usb_device *d, int onoff) static int vp7045_rc_query(struct dvb_usb_device *d) { + int ret; u8 key; - vp7045_usb_op(d,RC_VAL_READ,NULL,0,&key,1,20); - deb_rc("remote query key: %x %d\n",key,key); + ret = vp7045_usb_op(d, RC_VAL_READ, NULL, 0, &key, 1, 20); + if (ret) + return ret; + + deb_rc("remote query key: %x\n", key); if (key != 0x44) { /* @@ -115,15 +119,18 @@ static int vp7045_rc_query(struct dvb_usb_device *d) static int vp7045_read_eeprom(struct dvb_usb_device *d,u8 *buf, int len, int offset) { - int i = 0; - u8 v,br[2]; + int i, ret; + u8 v, br[2]; for (i=0; i < len; i++) { v = offset + i; - vp7045_usb_op(d,GET_EE_VALUE,&v,1,br,2,5); + ret = vp7045_usb_op(d, GET_EE_VALUE, &v, 1, br, 2, 5); + if (ret) + return ret; + buf[i] = br[1]; } - deb_info("VP7045 EEPROM read (offs: %d, len: %d) : ",offset, i); - debug_dump(buf,i,deb_info); + deb_info("VP7045 EEPROM read (offs: %d, len: %d) : ", offset, i); + debug_dump(buf, i, deb_info); return 0; } diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index 4add2b12d330..c1b307bbe540 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -1461,7 +1461,7 @@ int gspca_dev_probe2(struct usb_interface *intf, pr_err("couldn't kzalloc gspca struct\n"); return -ENOMEM; } - gspca_dev->usb_buf = kmalloc(USB_BUF_SZ, GFP_KERNEL); + gspca_dev->usb_buf = kzalloc(USB_BUF_SZ, GFP_KERNEL); if (!gspca_dev->usb_buf) { pr_err("out of memory\n"); ret = -ENOMEM; diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 428235ca2635..99883550375e 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -497,6 +497,22 @@ static int uvc_parse_format(struct uvc_device *dev, } } + /* Some devices report bpp that doesn't match the format. */ + if (dev->quirks & UVC_QUIRK_FORCE_BPP) { + const struct v4l2_format_info *info = + v4l2_format_info(format->fcc); + + if (info) { + unsigned int div = info->hdiv * info->vdiv; + + n = info->bpp[0] * div; + for (i = 1; i < info->comp_planes; i++) + n += info->bpp[i]; + + format->bpp = DIV_ROUND_UP(8 * n, div); + } + } + if (buffer[2] == UVC_VS_FORMAT_UNCOMPRESSED) { ftype = UVC_VS_FRAME_UNCOMPRESSED; } else { @@ -1493,6 +1509,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, break; if (forward == prev) continue; + if (forward->chain.next || forward->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", forward->id); + return -EINVAL; + } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: @@ -1574,6 +1595,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain, return -1; } + if (term->chain.next || term->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", + term->id); + return -EINVAL; + } + if (uvc_trace_param & UVC_TRACE_PROBE) printk(KERN_CONT " %d", term->id); @@ -2862,6 +2890,15 @@ static const struct usb_device_id uvc_ids[] = { .bInterfaceSubClass = 1, .bInterfaceProtocol = 0, .driver_info = (kernel_ulong_t)&uvc_quirk_force_y8 }, + /* GEO Semiconductor GC6500 */ + { .match_flags = USB_DEVICE_ID_MATCH_DEVICE + | USB_DEVICE_ID_MATCH_INT_INFO, + .idVendor = 0x29fe, + .idProduct = 0x4d53, + .bInterfaceClass = USB_CLASS_VIDEO, + .bInterfaceSubClass = 1, + .bInterfaceProtocol = 0, + .driver_info = UVC_INFO_QUIRK(UVC_QUIRK_FORCE_BPP) }, /* Intel RealSense D4M */ { .match_flags = USB_DEVICE_ID_MATCH_DEVICE | USB_DEVICE_ID_MATCH_INT_INFO, diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h index f773dc5d802c..6ab972c643e3 100644 --- a/drivers/media/usb/uvc/uvcvideo.h +++ b/drivers/media/usb/uvc/uvcvideo.h @@ -198,6 +198,7 @@ #define UVC_QUIRK_RESTRICT_FRAME_RATE 0x00000200 #define UVC_QUIRK_RESTORE_CTRLS_ON_INIT 0x00000400 #define UVC_QUIRK_FORCE_Y8 0x00000800 +#define UVC_QUIRK_FORCE_BPP 0x00001000 /* Format flags */ #define UVC_FMT_FLAG_COMPRESSED 0x00000001 diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index e1eaf1135c7f..7ad6db8dd9f6 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1183,36 +1183,38 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar u32 aux_space; int compatible_arg = 1; long err = 0; + unsigned int ncmd; /* * 1. When struct size is different, converts the command. */ switch (cmd) { - case VIDIOC_G_FMT32: cmd = VIDIOC_G_FMT; break; - case VIDIOC_S_FMT32: cmd = VIDIOC_S_FMT; break; - case VIDIOC_QUERYBUF32: cmd = VIDIOC_QUERYBUF; break; - case VIDIOC_G_FBUF32: cmd = VIDIOC_G_FBUF; break; - case VIDIOC_S_FBUF32: cmd = VIDIOC_S_FBUF; break; - case VIDIOC_QBUF32: cmd = VIDIOC_QBUF; break; - case VIDIOC_DQBUF32: cmd = VIDIOC_DQBUF; break; - case VIDIOC_ENUMSTD32: cmd = VIDIOC_ENUMSTD; break; - case VIDIOC_ENUMINPUT32: cmd = VIDIOC_ENUMINPUT; break; - case VIDIOC_TRY_FMT32: cmd = VIDIOC_TRY_FMT; break; - case VIDIOC_G_EXT_CTRLS32: cmd = VIDIOC_G_EXT_CTRLS; break; - case VIDIOC_S_EXT_CTRLS32: cmd = VIDIOC_S_EXT_CTRLS; break; - case VIDIOC_TRY_EXT_CTRLS32: cmd = VIDIOC_TRY_EXT_CTRLS; break; - case VIDIOC_DQEVENT32: cmd = VIDIOC_DQEVENT; break; - case VIDIOC_OVERLAY32: cmd = VIDIOC_OVERLAY; break; - case VIDIOC_STREAMON32: cmd = VIDIOC_STREAMON; break; - case VIDIOC_STREAMOFF32: cmd = VIDIOC_STREAMOFF; break; - case VIDIOC_G_INPUT32: cmd = VIDIOC_G_INPUT; break; - case VIDIOC_S_INPUT32: cmd = VIDIOC_S_INPUT; break; - case VIDIOC_G_OUTPUT32: cmd = VIDIOC_G_OUTPUT; break; - case VIDIOC_S_OUTPUT32: cmd = VIDIOC_S_OUTPUT; break; - case VIDIOC_CREATE_BUFS32: cmd = VIDIOC_CREATE_BUFS; break; - case VIDIOC_PREPARE_BUF32: cmd = VIDIOC_PREPARE_BUF; break; - case VIDIOC_G_EDID32: cmd = VIDIOC_G_EDID; break; - case VIDIOC_S_EDID32: cmd = VIDIOC_S_EDID; break; + case VIDIOC_G_FMT32: ncmd = VIDIOC_G_FMT; break; + case VIDIOC_S_FMT32: ncmd = VIDIOC_S_FMT; break; + case VIDIOC_QUERYBUF32: ncmd = VIDIOC_QUERYBUF; break; + case VIDIOC_G_FBUF32: ncmd = VIDIOC_G_FBUF; break; + case VIDIOC_S_FBUF32: ncmd = VIDIOC_S_FBUF; break; + case VIDIOC_QBUF32: ncmd = VIDIOC_QBUF; break; + case VIDIOC_DQBUF32: ncmd = VIDIOC_DQBUF; break; + case VIDIOC_ENUMSTD32: ncmd = VIDIOC_ENUMSTD; break; + case VIDIOC_ENUMINPUT32: ncmd = VIDIOC_ENUMINPUT; break; + case VIDIOC_TRY_FMT32: ncmd = VIDIOC_TRY_FMT; break; + case VIDIOC_G_EXT_CTRLS32: ncmd = VIDIOC_G_EXT_CTRLS; break; + case VIDIOC_S_EXT_CTRLS32: ncmd = VIDIOC_S_EXT_CTRLS; break; + case VIDIOC_TRY_EXT_CTRLS32: ncmd = VIDIOC_TRY_EXT_CTRLS; break; + case VIDIOC_DQEVENT32: ncmd = VIDIOC_DQEVENT; break; + case VIDIOC_OVERLAY32: ncmd = VIDIOC_OVERLAY; break; + case VIDIOC_STREAMON32: ncmd = VIDIOC_STREAMON; break; + case VIDIOC_STREAMOFF32: ncmd = VIDIOC_STREAMOFF; break; + case VIDIOC_G_INPUT32: ncmd = VIDIOC_G_INPUT; break; + case VIDIOC_S_INPUT32: ncmd = VIDIOC_S_INPUT; break; + case VIDIOC_G_OUTPUT32: ncmd = VIDIOC_G_OUTPUT; break; + case VIDIOC_S_OUTPUT32: ncmd = VIDIOC_S_OUTPUT; break; + case VIDIOC_CREATE_BUFS32: ncmd = VIDIOC_CREATE_BUFS; break; + case VIDIOC_PREPARE_BUF32: ncmd = VIDIOC_PREPARE_BUF; break; + case VIDIOC_G_EDID32: ncmd = VIDIOC_G_EDID; break; + case VIDIOC_S_EDID32: ncmd = VIDIOC_S_EDID; break; + default: ncmd = cmd; break; } /* @@ -1221,11 +1223,11 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * argument into it. */ switch (cmd) { - case VIDIOC_OVERLAY: - case VIDIOC_STREAMON: - case VIDIOC_STREAMOFF: - case VIDIOC_S_INPUT: - case VIDIOC_S_OUTPUT: + case VIDIOC_OVERLAY32: + case VIDIOC_STREAMON32: + case VIDIOC_STREAMOFF32: + case VIDIOC_S_INPUT32: + case VIDIOC_S_OUTPUT32: err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); if (!err && assign_in_user((unsigned int __user *)new_p64, (compat_uint_t __user *)p32)) @@ -1233,23 +1235,23 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_G_INPUT: - case VIDIOC_G_OUTPUT: + case VIDIOC_G_INPUT32: + case VIDIOC_G_OUTPUT32: err = alloc_userspace(sizeof(unsigned int), 0, &new_p64); compatible_arg = 0; break; - case VIDIOC_G_EDID: - case VIDIOC_S_EDID: + case VIDIOC_G_EDID32: + case VIDIOC_S_EDID32: err = alloc_userspace(sizeof(struct v4l2_edid), 0, &new_p64); if (!err) err = get_v4l2_edid32(new_p64, p32); compatible_arg = 0; break; - case VIDIOC_G_FMT: - case VIDIOC_S_FMT: - case VIDIOC_TRY_FMT: + case VIDIOC_G_FMT32: + case VIDIOC_S_FMT32: + case VIDIOC_TRY_FMT32: err = bufsize_v4l2_format(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_format), @@ -1262,7 +1264,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_CREATE_BUFS: + case VIDIOC_CREATE_BUFS32: err = bufsize_v4l2_create(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_create_buffers), @@ -1275,10 +1277,10 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_PREPARE_BUF: - case VIDIOC_QUERYBUF: - case VIDIOC_QBUF: - case VIDIOC_DQBUF: + case VIDIOC_PREPARE_BUF32: + case VIDIOC_QUERYBUF32: + case VIDIOC_QBUF32: + case VIDIOC_DQBUF32: err = bufsize_v4l2_buffer(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_buffer), @@ -1291,7 +1293,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_S_FBUF: + case VIDIOC_S_FBUF32: err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, &new_p64); if (!err) @@ -1299,13 +1301,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_G_FBUF: + case VIDIOC_G_FBUF32: err = alloc_userspace(sizeof(struct v4l2_framebuffer), 0, &new_p64); compatible_arg = 0; break; - case VIDIOC_ENUMSTD: + case VIDIOC_ENUMSTD32: err = alloc_userspace(sizeof(struct v4l2_standard), 0, &new_p64); if (!err) @@ -1313,16 +1315,16 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar compatible_arg = 0; break; - case VIDIOC_ENUMINPUT: + case VIDIOC_ENUMINPUT32: err = alloc_userspace(sizeof(struct v4l2_input), 0, &new_p64); if (!err) err = get_v4l2_input32(new_p64, p32); compatible_arg = 0; break; - case VIDIOC_G_EXT_CTRLS: - case VIDIOC_S_EXT_CTRLS: - case VIDIOC_TRY_EXT_CTRLS: + case VIDIOC_G_EXT_CTRLS32: + case VIDIOC_S_EXT_CTRLS32: + case VIDIOC_TRY_EXT_CTRLS32: err = bufsize_v4l2_ext_controls(p32, &aux_space); if (!err) err = alloc_userspace(sizeof(struct v4l2_ext_controls), @@ -1334,7 +1336,7 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar } compatible_arg = 0; break; - case VIDIOC_DQEVENT: + case VIDIOC_DQEVENT32: err = alloc_userspace(sizeof(struct v4l2_event), 0, &new_p64); compatible_arg = 0; break; @@ -1352,9 +1354,9 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * Otherwise, it will pass the newly allocated @new_p64 argument. */ if (compatible_arg) - err = native_ioctl(file, cmd, (unsigned long)p32); + err = native_ioctl(file, ncmd, (unsigned long)p32); else - err = native_ioctl(file, cmd, (unsigned long)new_p64); + err = native_ioctl(file, ncmd, (unsigned long)new_p64); if (err == -ENOTTY) return err; @@ -1370,13 +1372,13 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * the blocks to maximum allowed value. */ switch (cmd) { - case VIDIOC_G_EXT_CTRLS: - case VIDIOC_S_EXT_CTRLS: - case VIDIOC_TRY_EXT_CTRLS: + case VIDIOC_G_EXT_CTRLS32: + case VIDIOC_S_EXT_CTRLS32: + case VIDIOC_TRY_EXT_CTRLS32: if (put_v4l2_ext_controls32(file, new_p64, p32)) err = -EFAULT; break; - case VIDIOC_S_EDID: + case VIDIOC_S_EDID32: if (put_v4l2_edid32(new_p64, p32)) err = -EFAULT; break; @@ -1389,49 +1391,49 @@ static long do_video_ioctl(struct file *file, unsigned int cmd, unsigned long ar * the original 32 bits structure. */ switch (cmd) { - case VIDIOC_S_INPUT: - case VIDIOC_S_OUTPUT: - case VIDIOC_G_INPUT: - case VIDIOC_G_OUTPUT: + case VIDIOC_S_INPUT32: + case VIDIOC_S_OUTPUT32: + case VIDIOC_G_INPUT32: + case VIDIOC_G_OUTPUT32: if (assign_in_user((compat_uint_t __user *)p32, ((unsigned int __user *)new_p64))) err = -EFAULT; break; - case VIDIOC_G_FBUF: + case VIDIOC_G_FBUF32: err = put_v4l2_framebuffer32(new_p64, p32); break; - case VIDIOC_DQEVENT: + case VIDIOC_DQEVENT32: err = put_v4l2_event32(new_p64, p32); break; - case VIDIOC_G_EDID: + case VIDIOC_G_EDID32: err = put_v4l2_edid32(new_p64, p32); break; - case VIDIOC_G_FMT: - case VIDIOC_S_FMT: - case VIDIOC_TRY_FMT: + case VIDIOC_G_FMT32: + case VIDIOC_S_FMT32: + case VIDIOC_TRY_FMT32: err = put_v4l2_format32(new_p64, p32); break; - case VIDIOC_CREATE_BUFS: + case VIDIOC_CREATE_BUFS32: err = put_v4l2_create32(new_p64, p32); break; - case VIDIOC_PREPARE_BUF: - case VIDIOC_QUERYBUF: - case VIDIOC_QBUF: - case VIDIOC_DQBUF: + case VIDIOC_PREPARE_BUF32: + case VIDIOC_QUERYBUF32: + case VIDIOC_QBUF32: + case VIDIOC_DQBUF32: err = put_v4l2_buffer32(new_p64, p32); break; - case VIDIOC_ENUMSTD: + case VIDIOC_ENUMSTD32: err = put_v4l2_standard32(new_p64, p32); break; - case VIDIOC_ENUMINPUT: + case VIDIOC_ENUMINPUT32: err = put_v4l2_input32(new_p64, p32); break; } diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index 1afd9c6ad908..cc34c5ab7009 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -880,12 +880,12 @@ int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev, goto err_rel_entity1; /* Connect the three entities */ - ret = media_create_pad_link(m2m_dev->source, 0, &m2m_dev->proc, 1, + ret = media_create_pad_link(m2m_dev->source, 0, &m2m_dev->proc, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rel_entity2; - ret = media_create_pad_link(&m2m_dev->proc, 0, &m2m_dev->sink, 0, + ret = media_create_pad_link(&m2m_dev->proc, 1, &m2m_dev->sink, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rm_links0; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 66a6c6c236a7..28262190c3ab 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -349,8 +349,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) BUG_ON(dma->sglen); if (dma->pages) { - for (i = 0; i < dma->nr_pages; i++) + for (i = 0; i < dma->nr_pages; i++) { + if (dma->direction == DMA_FROM_DEVICE) + set_page_dirty_lock(dma->pages[i]); put_page(dma->pages[i]); + } kfree(dma->pages); dma->pages = NULL; } diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 420900852166..c366503c466d 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -758,6 +758,7 @@ config MFD_MAX77650 depends on OF || COMPILE_TEST select MFD_CORE select REGMAP_I2C + select REGMAP_IRQ help Say Y here to add support for Maxim Semiconductor MAX77650 and MAX77651 Power Management ICs. This is the core multifunction diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c index bafc729fc434..3c6fda68e6bc 100644 --- a/drivers/mfd/ab8500-core.c +++ b/drivers/mfd/ab8500-core.c @@ -631,8 +631,8 @@ static const struct mfd_cell ab8500_devs[] = { NULL, NULL, 0, 0, "stericsson,ab8500-ext-regulator"), OF_MFD_CELL("ab8500-regulator", NULL, NULL, 0, 0, "stericsson,ab8500-regulator"), - OF_MFD_CELL("abx500-clk", - NULL, NULL, 0, 0, "stericsson,abx500-clk"), + OF_MFD_CELL("ab8500-clk", + NULL, NULL, 0, 0, "stericsson,ab8500-clk"), OF_MFD_CELL("ab8500-gpadc", NULL, NULL, 0, 0, "stericsson,ab8500-gpadc"), OF_MFD_CELL("ab8500-rtc", diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c index a4aaadaa0cb0..aa59496e4376 100644 --- a/drivers/mfd/axp20x.c +++ b/drivers/mfd/axp20x.c @@ -126,7 +126,7 @@ static const struct regmap_range axp288_writeable_ranges[] = { static const struct regmap_range axp288_volatile_ranges[] = { regmap_reg_range(AXP20X_PWR_INPUT_STATUS, AXP288_POWER_REASON), regmap_reg_range(AXP288_BC_GLOBAL, AXP288_BC_GLOBAL), - regmap_reg_range(AXP288_BC_DET_STAT, AXP288_BC_DET_STAT), + regmap_reg_range(AXP288_BC_DET_STAT, AXP20X_VBUS_IPSOUT_MGMT), regmap_reg_range(AXP20X_CHRG_BAK_CTRL, AXP20X_CHRG_BAK_CTRL), regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L), regmap_reg_range(AXP20X_TIMER_CTRL, AXP20X_TIMER_CTRL), diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c index e69626867c26..9143de7b77b8 100644 --- a/drivers/mfd/da9062-core.c +++ b/drivers/mfd/da9062-core.c @@ -248,7 +248,7 @@ static const struct mfd_cell da9062_devs[] = { .name = "da9062-watchdog", .num_resources = ARRAY_SIZE(da9062_wdt_resources), .resources = da9062_wdt_resources, - .of_compatible = "dlg,da9062-wdt", + .of_compatible = "dlg,da9062-watchdog", }, { .name = "da9062-thermal", diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 381593fbe50f..7841c11411d0 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -722,6 +722,8 @@ static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; + struct usb_endpoint_descriptor *epin; + struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; @@ -731,12 +733,19 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; + epin = &hostif->endpoint[0].desc; + epout = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_bulk_out(epout)) + return -ENODEV; + if (!usb_endpoint_is_bulk_in(epin)) + return -ENODEV; + dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; - dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress; - dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress; + dln2->ep_out = epout->bEndpointAddress; + dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index da5cd9c92a59..ead2e79036a9 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -26,6 +26,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) case RN5T618_WATCHDOGCNT: case RN5T618_DCIRQ: case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL: + case RN5T618_ADCCNT3: case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3: case RN5T618_IR_GPR: case RN5T618_IR_GPF: diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index 25e5f24b3fec..5bdf57472314 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2112,8 +2112,8 @@ static int altera_execute(struct altera_state *astate, return status; } -static int altera_get_note(u8 *p, s32 program_size, - s32 *offset, char *key, char *value, int length) +static int altera_get_note(u8 *p, s32 program_size, s32 *offset, + char *key, char *value, int keylen, int vallen) /* * Gets key and value of NOTE fields in the JBC file. * Can be called in two modes: if offset pointer is NULL, @@ -2170,7 +2170,7 @@ static int altera_get_note(u8 *p, s32 program_size, &p[note_table + (8 * i) + 4])]; if (value != NULL) - strlcpy(value, value_ptr, length); + strlcpy(value, value_ptr, vallen); } } @@ -2189,13 +2189,13 @@ static int altera_get_note(u8 *p, s32 program_size, strlcpy(key, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])], - length); + keylen); if (value != NULL) strlcpy(value, &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i) + 4])], - length); + vallen); *offset = i + 1; } @@ -2449,7 +2449,7 @@ int altera_init(struct altera_config *config, const struct firmware *fw) __func__, (format_version == 2) ? "Jam STAPL" : "pre-standardized Jam 1.1"); while (altera_get_note((u8 *)fw->data, fw->size, - &offset, key, value, 256) == 0) + &offset, key, value, 32, 256) == 0) printk(KERN_INFO "%s: NOTE \"%s\" = \"%s\"\n", __func__, key, value); } diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 4feed296a327..423fecc19fc4 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -394,7 +394,7 @@ static const struct pcr_ops rts522a_pcr_ops = { void rts522a_init_params(struct rtsx_pcr *pcr) { rts5227_init_params(pcr); - + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11); pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; pcr->option.ocp_en = 1; diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index db936e4d6e56..1a81cda948c1 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = { void rts524a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; @@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = { void rts525a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 4214f02a17fd..711054ebad74 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5260_get_ic_version(pcr); diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index 32dcec2e9dfd..a9ba0115ff00 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -763,7 +763,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5261_get_ic_version(pcr); diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index b155e9549076..b680b0caa69b 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -598,7 +598,9 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) goto out; } - hdev->asic_funcs->halt_coresight(hdev); + if (!hdev->hard_reset_pending) + hdev->asic_funcs->halt_coresight(hdev); + hdev->in_debug = 0; goto out; @@ -1189,6 +1191,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { dev_info(hdev->dev, "H/W state is dirty, must reset before initializing\n"); + hdev->asic_funcs->halt_engines(hdev, true); hdev->asic_funcs->hw_fini(hdev, true); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 7344e8a222ae..b8a8de24aaf7 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -895,6 +895,11 @@ void goya_init_dma_qmans(struct hl_device *hdev) */ static void goya_disable_external_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return; + WREG32(mmDMA_QM_0_GLBL_CFG0, 0); WREG32(mmDMA_QM_1_GLBL_CFG0, 0); WREG32(mmDMA_QM_2_GLBL_CFG0, 0); @@ -956,6 +961,11 @@ static int goya_stop_external_queues(struct hl_device *hdev) { int rc, retval = 0; + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return retval; + rc = goya_stop_queue(hdev, mmDMA_QM_0_GLBL_CFG1, mmDMA_QM_0_CP_STS, @@ -1744,9 +1754,18 @@ void goya_init_tpc_qmans(struct hl_device *hdev) */ static void goya_disable_internal_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + goto disable_tpc; + WREG32(mmMME_QM_GLBL_CFG0, 0); WREG32(mmMME_CMDQ_GLBL_CFG0, 0); +disable_tpc: + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return; + WREG32(mmTPC0_QM_GLBL_CFG0, 0); WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0); @@ -1782,8 +1801,12 @@ static void goya_disable_internal_queues(struct hl_device *hdev) */ static int goya_stop_internal_queues(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; int rc, retval = 0; + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + goto stop_tpc; + /* * Each queue (QMAN) is a separate H/W logic. That means that each * QMAN can be stopped independently and failure to stop one does NOT @@ -1810,6 +1833,10 @@ static int goya_stop_internal_queues(struct hl_device *hdev) retval = -EIO; } +stop_tpc: + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return retval; + rc = goya_stop_queue(hdev, mmTPC0_QM_GLBL_CFG1, mmTPC0_QM_CP_STS, @@ -1975,6 +2002,11 @@ static int goya_stop_internal_queues(struct hl_device *hdev) static void goya_dma_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_DMA)) + return; + WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT); WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT); WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT); @@ -1984,6 +2016,11 @@ static void goya_dma_stall(struct hl_device *hdev) static void goya_tpc_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_TPC)) + return; + WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT); WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT); @@ -1996,6 +2033,11 @@ static void goya_tpc_stall(struct hl_device *hdev) static void goya_mme_stall(struct hl_device *hdev) { + struct goya_device *goya = hdev->asic_specific; + + if (!(goya->hw_cap_initialized & HW_CAP_MME)) + return; + WREG32(mmMME_STALL, 0xFFFFFFFF); } @@ -4648,8 +4690,6 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, rc = goya_send_job_on_qman0(hdev, job); - hl_cb_put(job->patched_cb); - hl_debugfs_remove_job(hdev, job); kfree(job); cb->cs_cnt--; diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c index 93027fd96c71..4c596c646ac0 100644 --- a/drivers/misc/mei/hdcp/mei_hdcp.c +++ b/drivers/misc/mei/hdcp/mei_hdcp.c @@ -757,11 +757,38 @@ static const struct component_master_ops mei_component_master_ops = { .unbind = mei_component_master_unbind, }; +/** + * mei_hdcp_component_match - compare function for matching mei hdcp. + * + * The function checks if the driver is i915, the subcomponent is HDCP + * and the grand parent of hdcp and the parent of i915 are the same + * PCH device. + * + * @dev: master device + * @subcomponent: subcomponent to match (I915_COMPONENT_HDCP) + * @data: compare data (mei hdcp device) + * + * Return: + * * 1 - if components match + * * 0 - otherwise + */ static int mei_hdcp_component_match(struct device *dev, int subcomponent, void *data) { - return !strcmp(dev->driver->name, "i915") && - subcomponent == I915_COMPONENT_HDCP; + struct device *base = data; + + if (strcmp(dev->driver->name, "i915") || + subcomponent != I915_COMPONENT_HDCP) + return 0; + + base = base->parent; + if (!base) + return 0; + + base = base->parent; + dev = dev->parent; + + return (base && dev && dev == base); } static int mei_hdcp_probe(struct mei_cl_device *cldev, @@ -785,7 +812,7 @@ static int mei_hdcp_probe(struct mei_cl_device *cldev, master_match = NULL; component_match_add_typed(&cldev->dev, &master_match, - mei_hdcp_component_match, comp_master); + mei_hdcp_component_match, &cldev->dev); if (IS_ERR_OR_NULL(master_match)) { ret = -ENOMEM; goto err_exit; diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 7cd67fb2365d..87a0201ba6b3 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -81,10 +81,16 @@ #define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ #define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ + #define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */ +#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ +#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ + #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ + #define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ #define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index c845b7e40f26..2711451b3d87 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -99,11 +99,15 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_JSP_N, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)}, diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c index 11835969e982..48ba7e02bed7 100644 --- a/drivers/misc/xilinx_sdfec.c +++ b/drivers/misc/xilinx_sdfec.c @@ -1025,25 +1025,25 @@ static long xsdfec_dev_compat_ioctl(struct file *file, unsigned int cmd, } #endif -static unsigned int xsdfec_poll(struct file *file, poll_table *wait) +static __poll_t xsdfec_poll(struct file *file, poll_table *wait) { - unsigned int mask = 0; + __poll_t mask = 0; struct xsdfec_dev *xsdfec; xsdfec = container_of(file->private_data, struct xsdfec_dev, miscdev); if (!xsdfec) - return POLLNVAL | POLLHUP; + return EPOLLNVAL | EPOLLHUP; poll_wait(file, &xsdfec->waitq, wait); /* XSDFEC ISR detected an error */ spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags); if (xsdfec->state_updated) - mask |= POLLIN | POLLPRI; + mask |= EPOLLIN | EPOLLPRI; if (xsdfec->stats_updated) - mask |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags); return mask; diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 105b7a7c0251..b3484def0a8b 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -176,7 +176,6 @@ int mmc_of_parse(struct mmc_host *host) u32 bus_width, drv_type, cd_debounce_delay_ms; int ret; bool cd_cap_invert, cd_gpio_invert = false; - bool ro_cap_invert, ro_gpio_invert = false; if (!dev || !dev_fwnode(dev)) return 0; @@ -255,9 +254,11 @@ int mmc_of_parse(struct mmc_host *host) } /* Parse Write Protection */ - ro_cap_invert = device_property_read_bool(dev, "wp-inverted"); - ret = mmc_gpiod_request_ro(host, "wp", 0, 0, &ro_gpio_invert); + if (device_property_read_bool(dev, "wp-inverted")) + host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; + + ret = mmc_gpiod_request_ro(host, "wp", 0, 0, NULL); if (!ret) dev_info(host->parent, "Got WP GPIO\n"); else if (ret != -ENOENT && ret != -ENOSYS) @@ -266,10 +267,6 @@ int mmc_of_parse(struct mmc_host *host) if (device_property_read_bool(dev, "disable-wp")) host->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; - /* See the comment on CD inversion above */ - if (ro_cap_invert ^ ro_gpio_invert) - host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - if (device_property_read_bool(dev, "cap-sd-highspeed")) host->caps |= MMC_CAP_SD_HIGHSPEED; if (device_property_read_bool(dev, "cap-mmc-highspeed")) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index da2596c5fa28..582ec3d720f6 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -241,6 +241,9 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, return ret; } + if (host->caps2 & MMC_CAP2_RO_ACTIVE_HIGH) + gpiod_toggle_active_low(desc); + if (gpio_invert) *gpio_invert = !gpiod_is_active_low(desc); diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 74c6cfbf9172..1f02f54f09c0 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1134,17 +1134,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host) * SPI protocol. Another is that when chipselect is released while * the card returns BUSY status, the clock must issue several cycles * with chipselect high before the card will stop driving its output. + * + * SPI_CS_HIGH means "asserted" here. In some cases like when using + * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically + * inverted by gpiolib, so if we want to ascertain to drive it high + * we should toggle the default with an XOR as we do here. */ - host->spi->mode |= SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Just warn; most cards work without it. */ dev_warn(&host->spi->dev, "can't change chip-select polarity\n"); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; } else { mmc_spi_readbytes(host, 18); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Wot, we can't get the same setup we had before? */ dev_err(&host->spi->dev, diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 024acc1b0a2e..b2bbcb09a49e 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -740,16 +740,16 @@ static int pxamci_probe(struct platform_device *pdev) goto out; } + if (!host->pdata->gpio_card_ro_invert) + mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; + ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0, NULL); if (ret && ret != -ENOENT) { dev_err(dev, "Failed requesting gpio_ro\n"); goto out; } - if (!ret) { + if (!ret) host->use_ro_gpio = true; - mmc->caps2 |= host->pdata->gpio_card_ro_invert ? - 0 : MMC_CAP2_RO_ACTIVE_HIGH; - } if (host->pdata->init) host->pdata->init(dev, pxamci_detect_irq, mmc); diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index bd50935dc37d..11087976ab19 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -606,19 +606,22 @@ static int sd_change_phase(struct realtek_pci_sdmmc *host, u8 sample_point, bool rx) { struct rtsx_pcr *pcr = host->pcr; - + u16 SD_VP_CTL = 0; dev_dbg(sdmmc_dev(host), "%s(%s): sample_point = %d\n", __func__, rx ? "RX" : "TX", sample_point); rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, CHANGE_CLK); - if (rx) + if (rx) { + SD_VP_CTL = SD_VPRX_CTL; rtsx_pci_write_register(pcr, SD_VPRX_CTL, PHASE_SELECT_MASK, sample_point); - else + } else { + SD_VP_CTL = SD_VPTX_CTL; rtsx_pci_write_register(pcr, SD_VPTX_CTL, PHASE_SELECT_MASK, sample_point); - rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, 0); - rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, + } + rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, 0); + rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, PHASE_NOT_RESET); rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, 0); rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0); diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 105e73d4a3b9..5d64d9c12ce9 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -72,9 +73,16 @@ struct sdhci_acpi_host { const struct sdhci_acpi_slot *slot; struct platform_device *pdev; bool use_runtime_pm; + bool is_intel; + bool reset_signal_volt_on_suspend; unsigned long private[0] ____cacheline_aligned; }; +enum { + DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0), + DMI_QUIRK_SD_NO_WRITE_PROTECT = BIT(1), +}; + static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c) { return (void *)c->private; @@ -391,6 +399,8 @@ static int intel_probe_slot(struct platform_device *pdev, struct acpi_device *ad host->mmc_host_ops.start_signal_voltage_switch = intel_start_signal_voltage_switch; + c->is_intel = true; + return 0; } @@ -647,6 +657,36 @@ static const struct acpi_device_id sdhci_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids); +static const struct dmi_system_id sdhci_acpi_quirks[] = { + { + /* + * The Lenovo Miix 320-10ICR has a bug in the _PS0 method of + * the SHC1 ACPI device, this bug causes it to reprogram the + * wrong LDO (DLDO3) to 1.8V if 1.8V modes are used and the + * card is (runtime) suspended + resumed. DLDO3 is used for + * the LCD and setting it to 1.8V causes the LCD to go black. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), + }, + .driver_data = (void *)DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP, + }, + { + /* + * The Acer Aspire Switch 10 (SW5-012) microSD slot always + * reports the card being write-protected even though microSD + * cards do not have a write-protect switch at all. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), + }, + .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, + }, + {} /* Terminating entry */ +}; + static const struct sdhci_acpi_slot *sdhci_acpi_get_slot(struct acpi_device *adev) { const struct sdhci_acpi_uid_slot *u; @@ -663,17 +703,23 @@ static int sdhci_acpi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct sdhci_acpi_slot *slot; struct acpi_device *device, *child; + const struct dmi_system_id *id; struct sdhci_acpi_host *c; struct sdhci_host *host; struct resource *iomem; resource_size_t len; size_t priv_size; + int quirks = 0; int err; device = ACPI_COMPANION(dev); if (!device) return -ENODEV; + id = dmi_first_match(sdhci_acpi_quirks); + if (id) + quirks = (long)id->driver_data; + slot = sdhci_acpi_get_slot(device); /* Power on the SDHCI controller and its children */ @@ -759,6 +805,12 @@ static int sdhci_acpi_probe(struct platform_device *pdev) dev_warn(dev, "failed to setup card detect gpio\n"); c->use_runtime_pm = false; } + + if (quirks & DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP) + c->reset_signal_volt_on_suspend = true; + + if (quirks & DMI_QUIRK_SD_NO_WRITE_PROTECT) + host->mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; } err = sdhci_setup_host(host); @@ -823,17 +875,39 @@ static int sdhci_acpi_remove(struct platform_device *pdev) return 0; } +static void __maybe_unused sdhci_acpi_reset_signal_voltage_if_needed( + struct device *dev) +{ + struct sdhci_acpi_host *c = dev_get_drvdata(dev); + struct sdhci_host *host = c->host; + + if (c->is_intel && c->reset_signal_volt_on_suspend && + host->mmc->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_330) { + struct intel_host *intel_host = sdhci_acpi_priv(c); + unsigned int fn = INTEL_DSM_V33_SWITCH; + u32 result = 0; + + intel_dsm(intel_host, dev, fn, &result); + } +} + #ifdef CONFIG_PM_SLEEP static int sdhci_acpi_suspend(struct device *dev) { struct sdhci_acpi_host *c = dev_get_drvdata(dev); struct sdhci_host *host = c->host; + int ret; if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); - return sdhci_suspend_host(host); + ret = sdhci_suspend_host(host); + if (ret) + return ret; + + sdhci_acpi_reset_signal_voltage_if_needed(dev); + return 0; } static int sdhci_acpi_resume(struct device *dev) @@ -853,11 +927,17 @@ static int sdhci_acpi_runtime_suspend(struct device *dev) { struct sdhci_acpi_host *c = dev_get_drvdata(dev); struct sdhci_host *host = c->host; + int ret; if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); - return sdhci_runtime_suspend_host(host); + ret = sdhci_runtime_suspend_host(host); + if (ret) + return ret; + + sdhci_acpi_reset_signal_voltage_if_needed(dev); + return 0; } static int sdhci_acpi_runtime_resume(struct device *dev) diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c index ae0ec27dd7cc..5f2e9696ee4d 100644 --- a/drivers/mmc/host/sdhci-cadence.c +++ b/drivers/mmc/host/sdhci-cadence.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "sdhci-pltfm.h" @@ -235,6 +236,11 @@ static const struct sdhci_ops sdhci_cdns_ops = { .set_uhs_signaling = sdhci_cdns_set_uhs_signaling, }; +static const struct sdhci_pltfm_data sdhci_cdns_uniphier_pltfm_data = { + .ops = &sdhci_cdns_ops, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, +}; + static const struct sdhci_pltfm_data sdhci_cdns_pltfm_data = { .ops = &sdhci_cdns_ops, }; @@ -334,6 +340,7 @@ static void sdhci_cdns_hs400_enhanced_strobe(struct mmc_host *mmc, static int sdhci_cdns_probe(struct platform_device *pdev) { struct sdhci_host *host; + const struct sdhci_pltfm_data *data; struct sdhci_pltfm_host *pltfm_host; struct sdhci_cdns_priv *priv; struct clk *clk; @@ -350,8 +357,12 @@ static int sdhci_cdns_probe(struct platform_device *pdev) if (ret) return ret; + data = of_device_get_match_data(dev); + if (!data) + data = &sdhci_cdns_pltfm_data; + nr_phy_params = sdhci_cdns_phy_param_count(dev->of_node); - host = sdhci_pltfm_init(pdev, &sdhci_cdns_pltfm_data, + host = sdhci_pltfm_init(pdev, data, struct_size(priv, phy_params, nr_phy_params)); if (IS_ERR(host)) { ret = PTR_ERR(host); @@ -431,7 +442,10 @@ static const struct dev_pm_ops sdhci_cdns_pm_ops = { }; static const struct of_device_id sdhci_cdns_match[] = { - { .compatible = "socionext,uniphier-sd4hc" }, + { + .compatible = "socionext,uniphier-sd4hc", + .data = &sdhci_cdns_uniphier_pltfm_data, + }, { .compatible = "cdns,sd4hc" }, { /* sentinel */ } }; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 1c988d6a2433..dccb4df46512 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1381,13 +1381,14 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev, host->mmc->parent->platform_data); /* write_protect */ if (boarddata->wp_type == ESDHC_WP_GPIO) { + host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; + err = mmc_gpiod_request_ro(host->mmc, "wp", 0, 0, NULL); if (err) { dev_err(mmc_dev(host->mmc), "failed to request write-protect gpio!\n"); return err; } - host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; } /* card_detect */ diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 5959e394b416..d9da141e325f 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -125,7 +125,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) sdhci_reset(host, mask); - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); if (priv->cal_always_on && (mask & SDHCI_RESET_ALL)) @@ -335,19 +336,22 @@ static int sdhci_at91_probe(struct platform_device *pdev) priv->mainck = devm_clk_get(&pdev->dev, "baseclk"); if (IS_ERR(priv->mainck)) { dev_err(&pdev->dev, "failed to get baseclk\n"); - return PTR_ERR(priv->mainck); + ret = PTR_ERR(priv->mainck); + goto sdhci_pltfm_free; } priv->hclock = devm_clk_get(&pdev->dev, "hclock"); if (IS_ERR(priv->hclock)) { dev_err(&pdev->dev, "failed to get hclock\n"); - return PTR_ERR(priv->hclock); + ret = PTR_ERR(priv->hclock); + goto sdhci_pltfm_free; } priv->gck = devm_clk_get(&pdev->dev, "multclk"); if (IS_ERR(priv->gck)) { dev_err(&pdev->dev, "failed to get multclk\n"); - return PTR_ERR(priv->gck); + ret = PTR_ERR(priv->gck); + goto sdhci_pltfm_free; } ret = sdhci_at91_set_clks_presets(&pdev->dev); @@ -413,8 +417,11 @@ static int sdhci_at91_probe(struct platform_device *pdev) * detection procedure using the SDMCC_CD signal is bypassed. * This bit is reset when a software reset for all command is performed * so we need to implement our own reset function to set back this bit. + * + * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line. */ - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); pm_runtime_put_autosuspend(&pdev->dev); diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 5eea8d70a85d..ce15a05f23d4 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -262,10 +262,26 @@ static int gl9750_execute_tuning(struct sdhci_host *host, u32 opcode) return 0; } +static void gli_pcie_enable_msi(struct sdhci_pci_slot *slot) +{ + int ret; + + ret = pci_alloc_irq_vectors(slot->chip->pdev, 1, 1, + PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (ret < 0) { + pr_warn("%s: enable PCI MSI failed, error=%d\n", + mmc_hostname(slot->host->mmc), ret); + return; + } + + slot->host->irq = pci_irq_vector(slot->chip->pdev, 0); +} + static int gli_probe_slot_gl9750(struct sdhci_pci_slot *slot) { struct sdhci_host *host = slot->host; + gli_pcie_enable_msi(slot); slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; sdhci_enable_v4_mode(host); @@ -276,6 +292,7 @@ static int gli_probe_slot_gl9755(struct sdhci_pci_slot *slot) { struct sdhci_host *host = slot->host; + gli_pcie_enable_msi(slot); slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO; sdhci_enable_v4_mode(host); diff --git a/drivers/mtd/nand/onenand/onenand_base.c b/drivers/mtd/nand/onenand/onenand_base.c index 85640ee11c86..d5326d19b136 100644 --- a/drivers/mtd/nand/onenand/onenand_base.c +++ b/drivers/mtd/nand/onenand/onenand_base.c @@ -1248,44 +1248,44 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from, stats = mtd->ecc_stats; - /* Read-while-load method */ + /* Read-while-load method */ - /* Do first load to bufferRAM */ - if (read < len) { - if (!onenand_check_bufferram(mtd, from)) { + /* Do first load to bufferRAM */ + if (read < len) { + if (!onenand_check_bufferram(mtd, from)) { this->command(mtd, ONENAND_CMD_READ, from, writesize); - ret = this->wait(mtd, FL_READING); - onenand_update_bufferram(mtd, from, !ret); + ret = this->wait(mtd, FL_READING); + onenand_update_bufferram(mtd, from, !ret); if (mtd_is_eccerr(ret)) ret = 0; - } - } + } + } thislen = min_t(int, writesize, len - read); column = from & (writesize - 1); if (column + thislen > writesize) thislen = writesize - column; - while (!ret) { - /* If there is more to load then start next load */ - from += thislen; - if (read + thislen < len) { + while (!ret) { + /* If there is more to load then start next load */ + from += thislen; + if (read + thislen < len) { this->command(mtd, ONENAND_CMD_READ, from, writesize); - /* - * Chip boundary handling in DDP - * Now we issued chip 1 read and pointed chip 1 + /* + * Chip boundary handling in DDP + * Now we issued chip 1 read and pointed chip 1 * bufferram so we have to point chip 0 bufferram. - */ - if (ONENAND_IS_DDP(this) && - unlikely(from == (this->chipsize >> 1))) { - this->write_word(ONENAND_DDP_CHIP0, this->base + ONENAND_REG_START_ADDRESS2); - boundary = 1; - } else - boundary = 0; - ONENAND_SET_PREV_BUFFERRAM(this); - } - /* While load is going, read from last bufferRAM */ - this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen); + */ + if (ONENAND_IS_DDP(this) && + unlikely(from == (this->chipsize >> 1))) { + this->write_word(ONENAND_DDP_CHIP0, this->base + ONENAND_REG_START_ADDRESS2); + boundary = 1; + } else + boundary = 0; + ONENAND_SET_PREV_BUFFERRAM(this); + } + /* While load is going, read from last bufferRAM */ + this->read_bufferram(mtd, ONENAND_DATARAM, buf, column, thislen); /* Read oob area if needed */ if (oobbuf) { @@ -1301,24 +1301,24 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from, oobcolumn = 0; } - /* See if we are done */ - read += thislen; - if (read == len) - break; - /* Set up for next read from bufferRAM */ - if (unlikely(boundary)) - this->write_word(ONENAND_DDP_CHIP1, this->base + ONENAND_REG_START_ADDRESS2); - ONENAND_SET_NEXT_BUFFERRAM(this); - buf += thislen; + /* See if we are done */ + read += thislen; + if (read == len) + break; + /* Set up for next read from bufferRAM */ + if (unlikely(boundary)) + this->write_word(ONENAND_DDP_CHIP1, this->base + ONENAND_REG_START_ADDRESS2); + ONENAND_SET_NEXT_BUFFERRAM(this); + buf += thislen; thislen = min_t(int, writesize, len - read); - column = 0; - cond_resched(); - /* Now wait for load */ - ret = this->wait(mtd, FL_READING); - onenand_update_bufferram(mtd, from, !ret); + column = 0; + cond_resched(); + /* Now wait for load */ + ret = this->wait(mtd, FL_READING); + onenand_update_bufferram(mtd, from, !ret); if (mtd_is_eccerr(ret)) ret = 0; - } + } /* * Return success, if no ECC failures, else -EBADMSG diff --git a/drivers/mtd/parsers/sharpslpart.c b/drivers/mtd/parsers/sharpslpart.c index e5ea6127ab5a..671a61845bd5 100644 --- a/drivers/mtd/parsers/sharpslpart.c +++ b/drivers/mtd/parsers/sharpslpart.c @@ -165,10 +165,10 @@ static int sharpsl_nand_get_logical_num(u8 *oob) static int sharpsl_nand_init_ftl(struct mtd_info *mtd, struct sharpsl_ftl *ftl) { - unsigned int block_num, log_num, phymax; + unsigned int block_num, phymax; + int i, ret, log_num; loff_t block_adr; u8 *oob; - int i, ret; oob = kzalloc(mtd->oobsize, GFP_KERNEL); if (!oob) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index b0cd443dd758..70e55fe61b99 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -2461,15 +2461,16 @@ static const struct flash_info spi_nor_ids[] = { { "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "n25q256ax1", INFO(0x20bb19, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_QUAD_READ) }, { "n25q512ax3", INFO(0x20ba20, 0, 64 * 1024, 1024, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ) }, + { "mt25qu512a", INFO6(0x20bb20, 0x104400, 64 * 1024, 1024, + SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | + SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, + { "n25q512a", INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K | + SPI_NOR_QUAD_READ) }, { "n25q00", INFO(0x20ba21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, { "n25q00a", INFO(0x20bb21, 0, 64 * 1024, 2048, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, { "mt25ql02g", INFO(0x20ba22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, - { "mt25qu512a (n25q512a)", INFO(0x20bb20, 0, 64 * 1024, 1024, - SECT_4K | USE_FSR | SPI_NOR_DUAL_READ | - SPI_NOR_QUAD_READ | - SPI_NOR_4B_OPCODES) }, { "mt25qu02g", INFO(0x20bb22, 0, 64 * 1024, 4096, SECT_4K | USE_FSR | SPI_NOR_QUAD_READ | NO_CHIP_ERASE) }, /* Micron */ diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 1c7be4eb3ba6..53f448e7433a 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -64,7 +64,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { + if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } @@ -1137,7 +1137,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - unsigned long *seen_pebs = NULL; + unsigned long *seen_pebs; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); @@ -1151,7 +1151,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID); if (!dvbuf) { ret = -ENOMEM; - goto out_kfree; + goto out_free_avbuf; } avhdr = ubi_get_vid_hdr(avbuf); @@ -1160,7 +1160,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, seen_pebs = init_seen(ubi); if (IS_ERR(seen_pebs)) { ret = PTR_ERR(seen_pebs); - goto out_kfree; + goto out_free_dvbuf; } spin_lock(&ubi->volumes_lock); @@ -1328,7 +1328,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf); if (ret) { ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); - goto out_kfree; + goto out_free_seen; } for (i = 0; i < new_fm->used_blocks; i++) { @@ -1350,7 +1350,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write vid_hdr to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write fastmap to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1370,10 +1370,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = self_check_seen(ubi, seen_pebs); dbg_bld("fastmap written!"); -out_kfree: - ubi_free_vid_buf(avbuf); - ubi_free_vid_buf(dvbuf); +out_free_seen: free_seen(seen_pebs); +out_free_dvbuf: + ubi_free_vid_buf(dvbuf); +out_free_avbuf: + ubi_free_vid_buf(avbuf); + out: return ret; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d02f12a5254e..01e2657e4c26 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -71,6 +71,49 @@ config DUMMY To compile this driver as a module, choose M here: the module will be called dummy. +config WIREGUARD + tristate "WireGuard secure network tunnel" + depends on NET && INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select DST_CACHE + select CRYPTO + select CRYPTO_LIB_CURVE25519 + select CRYPTO_LIB_CHACHA20POLY1305 + select CRYPTO_LIB_BLAKE2S + select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT + select CRYPTO_POLY1305_X86_64 if X86 && 64BIT + select CRYPTO_BLAKE2S_X86 if X86 && 64BIT + select CRYPTO_CURVE25519_X86 if X86 && 64BIT + select ARM_CRYPTO if ARM + select ARM64_CRYPTO if ARM64 + select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON + select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON + select CRYPTO_POLY1305_ARM if ARM + select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON + select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 + select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + help + WireGuard is a secure, fast, and easy to use replacement for IPSec + that uses modern cryptography and clever networking tricks. It's + designed to be fairly general purpose and abstract enough to fit most + use cases, while at the same time remaining extremely simple to + configure. See www.wireguard.com for more info. + + It's safe to say Y or M here, as the driver is very lightweight and + is only in use when an administrator chooses to add an interface. + +config WIREGUARD_DEBUG + bool "Debugging checks and verbose messages" + depends on WIREGUARD + help + This will write log messages for handshake and other events + that occur for a WireGuard interface. It will also perform some + extra validation checks and unit tests at various points. This is + only useful for debugging. + + Say N here unless you know what you're doing. + config EQUALIZER tristate "EQL (serial line load balancing) support" ---help--- diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 0d3ba056cda3..953b7c12f0b0 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/ obj-$(CONFIG_IPVLAN) += ipvlan/ obj-$(CONFIG_IPVTAP) += ipvlan/ obj-$(CONFIG_DUMMY) += dummy.o +obj-$(CONFIG_WIREGUARD) += wireguard/ obj-$(CONFIG_EQUALIZER) += eql.o obj-$(CONFIG_IFB) += ifb.o obj-$(CONFIG_MACSEC) += macsec.o diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4f2e6910c623..c81698550e5a 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -50,11 +50,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -553,10 +548,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -653,8 +649,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -664,7 +664,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -676,7 +676,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. @@ -1383,26 +1383,31 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; - struct ipv6hdr *ip6hdr; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; if (is_broadcast_ether_addr(eth_data->h_dest) || - iph->daddr == ip_bcast || - iph->protocol == IPPROTO_IGMP) { + !pskb_network_may_pull(skb, sizeof(*iph))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ @@ -1419,7 +1424,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - /* Additianally, DAD probes should not be tx-balanced as that + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + do_tx_balance = false; + break; + } + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ @@ -1429,17 +1438,26 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { + } + case ETH_P_IPX: { + const struct ipxhdr *ipxhdr; + + if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { + do_tx_balance = false; + break; + } + ipxhdr = (struct ipxhdr *)skb_network_header(skb); + + if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ do_tx_balance = false; break; } - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { + if (ipxhdr->ipx_type != IPX_TYPE_NCP) { /* The only protocol worth balancing in * this family since it has an "ARP" like * mechanism @@ -1448,9 +1466,11 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } + eth_data = eth_hdr(skb); hash_start = (char *)eth_data->h_dest; hash_size = ETH_ALEN; break; + } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 48d5ec770b94..d10805e5e623 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3526,6 +3526,47 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res, } } +#ifdef CONFIG_LOCKDEP +static int bond_get_lowest_level_rcu(struct net_device *dev) +{ + struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; + struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; + int cur = 0, max = 0; + + now = dev; + iter = &dev->adj_list.lower; + + while (1) { + next = NULL; + while (1) { + ldev = netdev_next_lower_dev_rcu(now, &iter); + if (!ldev) + break; + + next = ldev; + niter = &ldev->adj_list.lower; + dev_stack[cur] = now; + iter_stack[cur++] = iter; + if (max <= cur) + max = cur; + break; + } + + if (!next) { + if (!cur) + return max; + next = dev_stack[--cur]; + niter = iter_stack[cur]; + } + + now = next; + iter = niter; + } + + return max; +} +#endif + static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats) { @@ -3533,11 +3574,17 @@ static void bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 temp; struct list_head *iter; struct slave *slave; + int nest_level = 0; - spin_lock(&bond->stats_lock); - memcpy(stats, &bond->bond_stats, sizeof(*stats)); rcu_read_lock(); +#ifdef CONFIG_LOCKDEP + nest_level = bond_get_lowest_level_rcu(bond_dev); +#endif + + spin_lock_nested(&bond->stats_lock, nest_level); + memcpy(stats, &bond->bond_stats, sizeof(*stats)); + bond_for_each_slave_rcu(bond, slave, iter) { const struct rtnl_link_stats64 *new = dev_get_stats(slave->dev, &temp); @@ -3547,10 +3594,10 @@ static void bond_get_stats(struct net_device *bond_dev, /* save off the slave stats for the next run */ memcpy(&slave->slave_stats, new, sizeof(*new)); } - rcu_read_unlock(); memcpy(&bond->bond_stats, stats, sizeof(*stats)); spin_unlock(&bond->stats_lock); + rcu_read_unlock(); } static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd) @@ -3640,6 +3687,8 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd case BOND_RELEASE_OLD: case SIOCBONDRELEASE: res = bond_release(bond_dev, slave_dev); + if (!res) + netdev_update_lockdep_key(slave_dev); break; case BOND_SETHWADDR_OLD: case SIOCBONDSETHWADDR: diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index ddb3916d3506..215c10923289 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1398,6 +1398,8 @@ static int bond_option_slaves_set(struct bonding *bond, case '-': slave_dbg(bond->dev, dev, "Releasing interface\n"); ret = bond_release(bond->dev, dev); + if (!ret) + netdev_update_lockdep_key(dev); break; default: diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 6ee06a49fb4c..68834a2853c9 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -883,6 +883,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index edacacfc9365..38b16efda4a9 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -680,7 +680,7 @@ int b53_configure_vlan(struct dsa_switch *ds) b53_do_vlan_op(dev, VTA_CMD_CLEAR); } - b53_enable_vlan(dev, false, ds->vlan_filtering); + b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering); b53_for_each_port(dev, i) b53_write16(dev, B53_VLAN_PAGE, @@ -1353,6 +1353,9 @@ void b53_vlan_add(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); + if (vid == 0 && vid == b53_default_pvid(dev)) + untagged = true; + vl->members |= BIT(port); if (untagged && !dsa_is_cpu_port(ds, port)) vl->untag |= BIT(port); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3e8635311d0d..b0f5280a83cb 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -68,7 +68,8 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) /* Force link status for IMP port */ reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS | GMII_SPEED_UP_2G); + reg |= (MII_SW_OR | LINK_STS); + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset); /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c index c5f64959a184..1142768969c2 100644 --- a/drivers/net/dsa/microchip/ksz9477_spi.c +++ b/drivers/net/dsa/microchip/ksz9477_spi.c @@ -101,6 +101,12 @@ static struct spi_driver ksz9477_spi_driver = { module_spi_driver(ksz9477_spi_driver); +MODULE_ALIAS("spi:ksz9477"); +MODULE_ALIAS("spi:ksz9897"); +MODULE_ALIAS("spi:ksz9893"); +MODULE_ALIAS("spi:ksz9563"); +MODULE_ALIAS("spi:ksz8563"); +MODULE_ALIAS("spi:ksz9567"); MODULE_AUTHOR("Woojung Huh "); MODULE_DESCRIPTION("Microchip KSZ9477 Series Switch SPI access Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3bd988529178..5f88effeb17a 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2762,6 +2762,8 @@ static u64 mv88e6xxx_devlink_atu_bin_get(struct mv88e6xxx_chip *chip, goto unlock; } + occupancy &= MV88E6XXX_G2_ATU_STATS_MASK; + unlock: mv88e6xxx_reg_unlock(chip); diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index b016cc205f81..ca3a7a7a73c3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -278,13 +278,13 @@ int mv88e6095_g1_set_egress_port(struct mv88e6xxx_chip *chip, switch (direction) { case MV88E6XXX_EGRESS_DIR_INGRESS: dest_port_chip = &chip->ingress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_INGRESS_DEST_MASK); break; case MV88E6XXX_EGRESS_DIR_EGRESS: dest_port_chip = &chip->egress_dest_port; - reg &= MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; + reg &= ~MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK; reg |= port << __bf_shf(MV88E6185_G1_MONITOR_CTL_EGRESS_DEST_MASK); break; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 87bfe7c8c9cd..246751b4f3b5 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1096,6 +1096,13 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) { int err, irq, virq; + chip->g2_irq.masked = ~0; + mv88e6xxx_reg_lock(chip); + err = mv88e6xxx_g2_int_mask(chip, ~chip->g2_irq.masked); + mv88e6xxx_reg_unlock(chip); + if (err) + return err; + chip->g2_irq.domain = irq_domain_add_simple( chip->dev->of_node, 16, 0, &mv88e6xxx_g2_irq_domain_ops, chip); if (!chip->g2_irq.domain) @@ -1105,7 +1112,6 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip) irq_create_mapping(chip->g2_irq.domain, irq); chip->g2_irq.chip = mv88e6xxx_g2_irq_chip; - chip->g2_irq.masked = ~0; chip->device_irq = irq_find_mapping(chip->g1_irq.domain, MV88E6XXX_G1_STS_IRQ_DEVICE); diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index ea62604fdf8c..1fb58f9ad80b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -200,6 +200,11 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); @@ -1041,9 +1046,41 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, feature_ver); } +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + struct ena_admin_get_feat_resp get_resp; + int rc; + + hash_key = (ena_dev->rss).hash_key; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + ena_dev->rss.hash_key_dma_addr, + sizeof(ena_dev->rss.hash_key), 0); + if (unlikely(rc)) { + hash_key = NULL; + return -EOPNOTSUPP; + } rss->hash_key = dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), @@ -1254,30 +1291,6 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) return 0; } -static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) -{ - u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; - struct ena_rss *rss = &ena_dev->rss; - u8 idx; - u16 i; - - for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) - dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; - - for (i = 0; i < 1 << rss->tbl_log_size; i++) { - if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) - return -EINVAL; - idx = (u8)rss->rss_ind_tbl[i].cq_idx; - - if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) - return -EINVAL; - - rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; - } - - return 0; -} - static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, u16 intr_delay_resolution) { @@ -2297,15 +2310,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; @@ -2342,7 +2356,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; @@ -2606,10 +2624,6 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) if (!ind_tbl) return 0; - rc = ena_com_ind_tbl_convert_from_device(ena_dev); - if (unlikely(rc)) - return rc; - for (i = 0; i < (1 << rss->tbl_log_size); i++) ind_tbl[i] = rss->host_rss_ind_tbl[i]; @@ -2626,9 +2640,15 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_indr_tbl; + /* The following function might return unsupported in case the + * device doesn't support setting the key / hash function. We can safely + * ignore this error and have indirection table support only. + */ rc = ena_com_hash_key_allocate(ena_dev); - if (unlikely(rc)) + if (unlikely(rc) && rc != -EOPNOTSUPP) goto err_hash_key; + else if (rc != -EOPNOTSUPP) + ena_com_hash_key_fill_default_key(ena_dev); rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 0ce37d54ed10..469f298199a7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "ena_common_defs.h" #include "ena_admin_defs.h" @@ -655,6 +656,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index fc96c66b44cb..971f02ea55a1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -636,6 +636,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -644,11 +666,25 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; @@ -657,7 +693,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -700,10 +736,13 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: @@ -812,6 +851,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .set_channels = ena_set_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 948583fdcc28..1c1a41bd11da 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3049,8 +3049,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -3152,7 +3152,7 @@ static void ena_timer_service(struct timer_list *t) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_max_io_queue_num(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index bffd778f2ce3..2fe5eeea6b69 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -129,6 +129,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d8612131c55e..cc8031ae9aa3 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2020,7 +2020,7 @@ static int xgene_enet_probe(struct platform_device *pdev) int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), - XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); + XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); if (!ndev) return -ENOMEM; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index a1f99bef4a68..7b55633d2cb9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -722,6 +722,11 @@ static int aq_ethtool_set_priv_flags(struct net_device *ndev, u32 flags) if (flags & ~AQ_PRIV_FLAGS_MASK) return -EOPNOTSUPP; + if (hweight32((flags | priv_flags) & AQ_HW_LOOPBACK_MASK) > 1) { + netdev_info(ndev, "Can't enable more than one loopback simultaneously\n"); + return -EINVAL; + } + cfg->priv_flags = flags; if ((priv_flags ^ flags) & BIT(AQ_HW_LOOPBACK_DMA_NET)) { diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 6102251bb909..03ff92bc4a7f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -163,7 +163,7 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, } if ((aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) && - (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci), + (!test_bit(be16_to_cpu(fsp->h_ext.vlan_tci) & VLAN_VID_MASK, aq_nic->active_vlans))) { netdev_err(aq_nic->ndev, "ethtool: unknown vlan-id specified"); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index cc70c606b6ef..251767c31f7e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -337,6 +337,8 @@ struct aq_fw_ops { void (*enable_ptp)(struct aq_hw_s *self, int enable); + void (*adjust_ptp)(struct aq_hw_s *self, uint64_t adj); + int (*set_eee_rate)(struct aq_hw_s *self, u32 speed); int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index c85e3e29012c..e95f6a6bef73 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -533,8 +533,10 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; @@ -655,10 +657,6 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) if (likely(frags)) { err = self->aq_hw_ops->hw_ring_tx_xmit(self->aq_hw, ring, frags); - if (err >= 0) { - ++ring->stats.tx.packets; - ring->stats.tx.bytes += skb->len; - } } else { err = NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 2bb329606794..f74952674084 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -359,7 +359,8 @@ static int aq_suspend_common(struct device *dev, bool deep) netif_device_detach(nic->ndev); netif_tx_stop_all_queues(nic->ndev); - aq_nic_stop(nic); + if (netif_running(nic->ndev)) + aq_nic_stop(nic); if (deep) { aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol); @@ -375,7 +376,7 @@ static int atl_resume_common(struct device *dev, bool deep) { struct pci_dev *pdev = to_pci_dev(dev); struct aq_nic_s *nic; - int ret; + int ret = 0; nic = pci_get_drvdata(pdev); @@ -390,9 +391,11 @@ static int atl_resume_common(struct device *dev, bool deep) goto err_exit; } - ret = aq_nic_start(nic); - if (ret) - goto err_exit; + if (netif_running(nic->ndev)) { + ret = aq_nic_start(nic); + if (ret) + goto err_exit; + } netif_device_attach(nic->ndev); netif_tx_start_all_queues(nic->ndev); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 951d86f8b66e..bae95a618560 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -272,9 +272,12 @@ bool aq_ring_tx_clean(struct aq_ring_s *self) } } - if (unlikely(buff->is_eop)) - dev_kfree_skb_any(buff->skb); + if (unlikely(buff->is_eop)) { + ++self->stats.rx.packets; + self->stats.tx.bytes += buff->skb->len; + dev_kfree_skb_any(buff->skb); + } buff->pa = 0U; buff->eop_index = 0xffffU; self->sw_head = aq_ring_next_dx(self, self->sw_head); @@ -351,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, err = 0; goto err_exit; } - if (buff->is_error || buff->is_cso_err) { + if (buff->is_error || + (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { next_ = buff_->next, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 991e4d31b094..2c96f20f6289 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -78,7 +78,8 @@ struct __packed aq_ring_buff_s { u32 is_cleaned:1; u32 is_error:1; u32 is_vlan:1; - u32 rsvd3:4; + u32 is_lro:1; + u32 rsvd3:3; u16 eop_index; u16 rsvd4; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index ec041f78d063..d20d91cdece8 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -823,6 +823,8 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, } } + buff->is_lro = !!(HW_ATL_B0_RXD_WB_STAT2_RSCCNT & + rxd_wb->status); if (HW_ATL_B0_RXD_WB_STAT2_EOP & rxd_wb->status) { buff->len = rxd_wb->pkt_len % AQ_CFG_RX_FRAME_MAX; @@ -835,8 +837,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, rxd_wb->pkt_len > AQ_CFG_RX_FRAME_MAX ? AQ_CFG_RX_FRAME_MAX : rxd_wb->pkt_len; - if (HW_ATL_B0_RXD_WB_STAT2_RSCCNT & - rxd_wb->status) { + if (buff->is_lro) { /* LRO */ buff->next = rxd_wb->next_desc_ptr; ++ring->stats.rx.lro_packets; @@ -884,13 +885,16 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self, { struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; unsigned int i = 0U; + u32 vlan_promisc; + u32 l2_promisc; - hw_atl_rpfl2promiscuous_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC)); + l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) || + !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)); + vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc; - hw_atl_rpf_vlan_prom_mode_en_set(self, - IS_FILTER_ENABLED(IFF_PROMISC) || - cfg->is_vlan_force_promisc); + hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc); + + hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc); hw_atl_rpfl2multicast_flr_en_set(self, IS_FILTER_ENABLED(IFF_ALLMULTI) && @@ -1161,6 +1165,8 @@ static int hw_atl_b0_adj_sys_clock(struct aq_hw_s *self, s64 delta) { self->ptp_clk_offset += delta; + self->aq_fw_ops->adjust_ptp(self, self->ptp_clk_offset); + return 0; } @@ -1211,7 +1217,7 @@ static int hw_atl_b0_gpio_pulse(struct aq_hw_s *self, u32 index, fwreq.ptp_gpio_ctrl.index = index; fwreq.ptp_gpio_ctrl.period = period; /* Apply time offset */ - fwreq.ptp_gpio_ctrl.start = start - self->ptp_clk_offset; + fwreq.ptp_gpio_ctrl.start = start; size = sizeof(fwreq.msg_id) + sizeof(fwreq.ptp_gpio_ctrl); return self->aq_fw_ops->send_fw_request(self, &fwreq, size); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index f547baa6c954..354705f9bc49 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -22,6 +22,7 @@ #define HW_ATL_MIF_ADDR 0x0208U #define HW_ATL_MIF_VAL 0x020CU +#define HW_ATL_MPI_RPC_ADDR 0x0334U #define HW_ATL_RPC_CONTROL_ADR 0x0338U #define HW_ATL_RPC_STATE_ADR 0x033CU @@ -53,15 +54,14 @@ enum mcp_area { }; static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual); - static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self, enum hal_atl_utils_fw_state_e state); - static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self); static u32 hw_atl_utils_mpi_get_state(struct aq_hw_s *self); static u32 hw_atl_utils_mif_cmd_get(struct aq_hw_s *self); static u32 hw_atl_utils_mif_addr_get(struct aq_hw_s *self); static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self); +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self); int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops) { @@ -476,6 +476,10 @@ static int hw_atl_utils_init_ucp(struct aq_hw_s *self, self, self->mbox_addr, self->mbox_addr != 0U, 1000U, 10000U); + err = readx_poll_timeout_atomic(aq_fw1x_rpc_get, self, + self->rpc_addr, + self->rpc_addr != 0U, + 1000U, 100000U); return err; } @@ -531,6 +535,12 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, self, fw.val, sw.tid == fw.tid, 1000U, 100000U); + if (err < 0) + goto err_exit; + + err = aq_hw_err_from_flags(self); + if (err < 0) + goto err_exit; if (fw.len == 0xFFFFU) { err = hw_atl_utils_fw_rpc_call(self, sw.len); @@ -1025,6 +1035,11 @@ static u32 hw_atl_utils_rpc_state_get(struct aq_hw_s *self) return aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR); } +static u32 aq_fw1x_rpc_get(struct aq_hw_s *self) +{ + return aq_hw_read_reg(self, HW_ATL_MPI_RPC_ADDR); +} + const struct aq_fw_ops aq_fw_1x_ops = { .init = hw_atl_utils_mpi_create, .deinit = hw_atl_fw1x_deinit, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index 97ebf849695f..77a4ed64830f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -30,6 +30,9 @@ #define HW_ATL_FW3X_EXT_CONTROL_ADDR 0x378 #define HW_ATL_FW3X_EXT_STATE_ADDR 0x37c +#define HW_ATL_FW3X_PTP_ADJ_LSW_ADDR 0x50a0 +#define HW_ATL_FW3X_PTP_ADJ_MSW_ADDR 0x50a4 + #define HW_ATL_FW2X_CAP_PAUSE BIT(CAPS_HI_PAUSE) #define HW_ATL_FW2X_CAP_ASYM_PAUSE BIT(CAPS_HI_ASYMMETRIC_PAUSE) #define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY) @@ -475,6 +478,14 @@ static void aq_fw3x_enable_ptp(struct aq_hw_s *self, int enable) aq_hw_write_reg(self, HW_ATL_FW3X_EXT_CONTROL_ADDR, ptp_opts); } +static void aq_fw3x_adjust_ptp(struct aq_hw_s *self, uint64_t adj) +{ + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_LSW_ADDR, + (adj >> 0) & 0xffffffff); + aq_hw_write_reg(self, HW_ATL_FW3X_PTP_ADJ_MSW_ADDR, + (adj >> 32) & 0xffffffff); +} + static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode) { if (self->fw_ver_actual < HW_ATL_FW_VER_LED) @@ -633,4 +644,5 @@ const struct aq_fw_ops aq_fw_2x_ops = { .enable_ptp = aq_fw3x_enable_ptp, .led_control = aq_fw2x_led_control, .set_phyloopback = aq_fw2x_set_phyloopback, + .adjust_ptp = aq_fw3x_adjust_ptp, }; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index d6b1a153f9df..5e322fe30b1d 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2135,7 +2135,7 @@ static int bcm_sysport_rule_set(struct bcm_sysport_priv *priv, return -ENOSPC; index = find_first_zero_bit(priv->filters, RXCHK_BRCM_TAG_MAX); - if (index > RXCHK_BRCM_TAG_MAX) + if (index >= RXCHK_BRCM_TAG_MAX) return -ENOSPC; /* Location is the classification ID, and index is the position @@ -2728,6 +2728,9 @@ static int __maybe_unused bcm_sysport_resume(struct device *d) umac_reset(priv); + /* Disable the UniMAC RX/TX */ + umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0); + /* We may have been suspended and never received a WOL event that * would turn off MPD detection, take care of that now */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e6f18f6070ef..05da27a64d55 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -944,6 +944,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, dma_addr -= bp->rx_dma_offset; dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); + page_pool_release_page(rxr->page_pool, page); if (unlikely(!payload)) payload = eth_get_headlen(bp->dev, data_ptr, len); @@ -7892,7 +7893,7 @@ static void bnxt_setup_msix(struct bnxt *bp) int tcs, i; tcs = netdev_get_num_tc(dev); - if (tcs > 1) { + if (tcs) { int i, off, count; for (i = 0; i < tcs; i++) { @@ -9240,6 +9241,17 @@ void bnxt_half_close_nic(struct bnxt *bp) bnxt_free_mem(bp, false); } +static void bnxt_reenable_sriov(struct bnxt *bp) +{ + if (BNXT_PF(bp)) { + struct bnxt_pf_info *pf = &bp->pf; + int n = pf->active_vfs; + + if (n) + bnxt_cfg_hw_sriov(bp, &n, true); + } +} + static int bnxt_open(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -9258,15 +9270,10 @@ static int bnxt_open(struct net_device *dev) bnxt_hwrm_if_change(bp, false); } else { if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { - if (BNXT_PF(bp)) { - struct bnxt_pf_info *pf = &bp->pf; - int n = pf->active_vfs; - - if (n) - bnxt_cfg_hw_sriov(bp, &n, true); - } - if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) { bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); + } } bnxt_hwmon_open(bp); } @@ -9306,10 +9313,6 @@ static void __bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bnxt_debug_dev_exit(bp); bnxt_disable_napi(bp); del_timer_sync(&bp->timer); - if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && - pci_is_enabled(bp->pdev)) - pci_disable_device(bp->pdev); - bnxt_free_skbs(bp); /* Save ring stats before shutdown */ @@ -10087,9 +10090,16 @@ static void bnxt_reset(struct bnxt *bp, bool silent) static void bnxt_fw_reset_close(struct bnxt *bp) { bnxt_ulp_stop(bp); + /* When firmware is fatal state, disable PCI device to prevent + * any potential bad DMAs before freeing kernel memory. + */ + if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + pci_disable_device(bp->pdev); __bnxt_close_nic(bp, true, false); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); + if (pci_is_enabled(bp->pdev)) + pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; @@ -10822,6 +10832,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); bnxt_ulp_start(bp, rc); + if (!rc) + bnxt_reenable_sriov(bp); bnxt_dl_health_status_update(bp, true); rtnl_unlock(); break; @@ -10961,13 +10973,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } @@ -11433,9 +11445,9 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_sriov_disable(bp); bnxt_dl_fw_reporters_destroy(bp, true); - bnxt_dl_unregister(bp); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); + bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); bnxt_cancel_sp_work(bp); bp->sp_event = 0; @@ -11763,6 +11775,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (version_printed++ == 0) pr_info("%s", version); + /* Clear any pending DMA transactions from crash kernel + * while loading driver in capture kernel. + */ + if (is_kdump_kernel()) { + pci_clear_master(pdev); + pcie_flr(pdev); + } + max_irqs = bnxt_get_max_irq(pdev); dev = alloc_etherdev_mq(sizeof(*bp), max_irqs); if (!dev) @@ -11905,11 +11925,14 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_init_tc(bp); } + bnxt_dl_register(bp); + rc = register_netdev(dev); if (rc) - goto init_err_cleanup_tc; + goto init_err_cleanup; - bnxt_dl_register(bp); + if (BNXT_PF(bp)) + devlink_port_type_eth_set(&bp->dl_port, bp->dev); bnxt_dl_fw_reporters_create(bp); netdev_info(dev, "%s found at mem %lx, node addr %pM\n", @@ -11919,7 +11942,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -init_err_cleanup_tc: +init_err_cleanup: + bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); bnxt_clear_int_mode(bp); @@ -11956,10 +11980,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) dev_close(dev); bnxt_ulp_shutdown(bp); + bnxt_clear_int_mode(bp); + pci_disable_device(pdev); if (system_state == SYSTEM_POWER_OFF) { - bnxt_clear_int_mode(bp); - pci_disable_device(pdev); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 3eedd4477218..8fcaa2398344 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -526,7 +526,6 @@ int bnxt_dl_register(struct bnxt *bp) netdev_err(bp->dev, "devlink_port_register failed"); goto err_dl_param_unreg; } - devlink_port_type_eth_set(&bp->dl_port, bp->dev); rc = devlink_port_params_register(&bp->dl_port, bnxt_dl_port_params, ARRAY_SIZE(bnxt_dl_port_params)); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 08d56ec7b68a..972383aefc2b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2007,8 +2007,8 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_nvm_install_update_input install = {0}; const struct firmware *fw; - int rc, hwrm_err = 0; u32 item_len; + int rc = 0; u16 index; bnxt_hwrm_fw_set_time(bp); @@ -2052,15 +2052,14 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, memcpy(kmem, fw->data, fw->size); modify.host_src_addr = cpu_to_le64(dma_handle); - hwrm_err = hwrm_send_message(bp, &modify, - sizeof(modify), - FLASH_PACKAGE_TIMEOUT); + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); dma_free_coherent(&bp->pdev->dev, fw->size, kmem, dma_handle); } } release_firmware(fw); - if (rc || hwrm_err) + if (rc) goto err_exit; if ((install_type & 0xffff) == 0) @@ -2069,20 +2068,19 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, install.install_type = cpu_to_le32(install_type); mutex_lock(&bp->hwrm_cmd_lock); - hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) { + rc = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; if (resp->error_code && error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); - hwrm_err = _hwrm_send_message(bp, &install, - sizeof(install), - INSTALL_PACKAGE_TIMEOUT); + rc = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); } - if (hwrm_err) + if (rc) goto flash_pkg_exit; } @@ -2094,7 +2092,7 @@ int bnxt_flash_package_from_file(struct net_device *dev, const char *filename, flash_pkg_exit: mutex_unlock(&bp->hwrm_cmd_lock); err_exit: - if (hwrm_err == -EACCES) + if (rc == -EACCES) bnxt_print_admin_err(bp); return rc; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 6392a2530183..10244941a7a6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -294,6 +294,7 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); + reg &= ~ID_MODE_DIS; reg |= id_mode_dis; if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) reg |= RGMII_MODE_EN_V123; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 19fe4f4867c7..c16cc1cb5874 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -645,6 +645,7 @@ #define MACB_CAPS_GEM_HAS_PTP 0x00000040 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 #define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 +#define MACB_CAPS_MACB_IS_EMAC 0x08000000 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index f7d87c71aaa9..20db44d7cda8 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -73,7 +73,11 @@ struct sifive_fu540_macb_mgmt { /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) -#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) +/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a + * false amba_error in TX path from the DMA assuming there is not enough + * space in the SRAM (16KB) even when there is. + */ +#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU #define MACB_NETIF_LSO NETIF_F_TSO @@ -529,8 +533,21 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, old_ctrl = ctrl = macb_or_gem_readl(bp, NCFGR); /* Clear all the bits we might set later */ - ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE) | - GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + ctrl &= ~(MACB_BIT(SPD) | MACB_BIT(FD) | MACB_BIT(PAE)); + + if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { + if (state->interface == PHY_INTERFACE_MODE_RMII) + ctrl |= MACB_BIT(RM9200_RMII); + } else { + ctrl &= ~(GEM_BIT(GBE) | GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + + /* We do not support MLO_PAUSE_RX yet */ + if (state->pause & MLO_PAUSE_TX) + ctrl |= MACB_BIT(PAE); + + if (state->interface == PHY_INTERFACE_MODE_SGMII) + ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); + } if (state->speed == SPEED_1000) ctrl |= GEM_BIT(GBE); @@ -540,13 +557,6 @@ static void macb_mac_config(struct phylink_config *config, unsigned int mode, if (state->duplex) ctrl |= MACB_BIT(FD); - /* We do not support MLO_PAUSE_RX yet */ - if (state->pause & MLO_PAUSE_TX) - ctrl |= MACB_BIT(PAE); - - if (state->interface == PHY_INTERFACE_MODE_SGMII) - ctrl |= GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL); - /* Apply the new configuration, if any */ if (old_ctrl ^ ctrl) macb_or_gem_writel(bp, NCFGR, ctrl); @@ -565,9 +575,10 @@ static void macb_mac_link_down(struct phylink_config *config, unsigned int mode, unsigned int q; u32 ctrl; - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IDR, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IDR, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); /* Disable Rx and Tx */ ctrl = macb_readl(bp, NCR) & ~(MACB_BIT(RE) | MACB_BIT(TE)); @@ -584,17 +595,19 @@ static void macb_mac_link_up(struct phylink_config *config, unsigned int mode, struct macb_queue *queue; unsigned int q; - macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { + macb_set_tx_clk(bp->tx_clk, bp->speed, ndev); - /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down - * cleared the pipeline and control registers. - */ - bp->macbgem_ops.mog_init_rings(bp); - macb_init_buffers(bp); + /* Initialize rings & buffers as clearing MACB_BIT(TE) in link down + * cleared the pipeline and control registers. + */ + bp->macbgem_ops.mog_init_rings(bp); + macb_init_buffers(bp); - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) - queue_writel(queue, IER, - bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) + queue_writel(queue, IER, + bp->rx_intr_mask | MACB_TX_INT_FLAGS | MACB_BIT(HRESP)); + } /* Enable Rx and Tx */ macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(RE) | MACB_BIT(TE)); @@ -1752,16 +1765,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb, /* Validate LSO compatibility */ - /* there is only one buffer */ - if (!skb_is_nonlinear(skb)) + /* there is only one buffer or protocol is not UDP */ + if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP)) return features; /* length of header */ hdrlen = skb_transport_offset(skb); - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - hdrlen += tcp_hdrlen(skb); - /* For LSO: + /* For UFO only: * When software supplies two or more payload buffers all payload buffers * apart from the last must be a multiple of 8 bytes in size. */ @@ -3749,6 +3760,10 @@ static int at91ether_open(struct net_device *dev) u32 ctl; int ret; + ret = pm_runtime_get_sync(&lp->pdev->dev); + if (ret < 0) + return ret; + /* Clear internal statistics */ ctl = macb_readl(lp, NCR); macb_writel(lp, NCR, ctl | MACB_BIT(CLRSTAT)); @@ -3813,7 +3828,7 @@ static int at91ether_close(struct net_device *dev) q->rx_buffers, q->rx_buffers_dma); q->rx_buffers = NULL; - return 0; + return pm_runtime_put(&lp->pdev->dev); } /* Transmit packet */ @@ -3996,7 +4011,6 @@ static int at91ether_init(struct platform_device *pdev) struct net_device *dev = platform_get_drvdata(pdev); struct macb *bp = netdev_priv(dev); int err; - u32 reg; bp->queues[0].bp = bp; @@ -4010,11 +4024,7 @@ static int at91ether_init(struct platform_device *pdev) macb_writel(bp, NCR, 0); - reg = MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG); - if (bp->phy_interface == PHY_INTERFACE_MODE_RMII) - reg |= MACB_BIT(RM9200_RMII); - - macb_writel(bp, NCFGR, reg); + macb_writel(bp, NCFGR, MACB_BF(CLK, MACB_CLK_DIV32) | MACB_BIT(BIG)); return 0; } @@ -4173,7 +4183,7 @@ static const struct macb_config sama5d4_config = { }; static const struct macb_config emac_config = { - .caps = MACB_CAPS_NEEDS_RSTONUBR, + .caps = MACB_CAPS_NEEDS_RSTONUBR | MACB_CAPS_MACB_IS_EMAC, .clk_init = at91ether_clk_init, .init = at91ether_init, }; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index c4f6ec0cd183..00751771f662 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -410,10 +410,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -1535,6 +1544,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1550,7 +1601,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1604,6 +1655,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1620,6 +1673,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1637,6 +1691,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 25888706bdcd..cdea49392185 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -180,6 +180,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 0dedd3e9c31e..b11ba4565c20 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5351,12 +5351,11 @@ static inline bool is_x_10g_port(const struct link_config *lc) static int cfg_queues(struct adapter *adap) { u32 avail_qsets, avail_eth_qsets, avail_uld_qsets; + u32 i, n10g = 0, qidx = 0, n1g = 0; + u32 ncpus = num_online_cpus(); u32 niqflint, neq, num_ulds; struct sge *s = &adap->sge; - u32 i, n10g = 0, qidx = 0; -#ifndef CONFIG_CHELSIO_T4_DCB - int q10g = 0; -#endif + u32 q10g = 0, q1g; /* Reduce memory usage in kdump environment, disable all offload. */ if (is_kdump_kernel() || (is_uld(adap) && t4_uld_mem_alloc(adap))) { @@ -5394,44 +5393,50 @@ static int cfg_queues(struct adapter *adap) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); avail_eth_qsets = min_t(u32, avail_qsets, MAX_ETH_QSETS); + + /* We default to 1 queue per non-10G port and up to # of cores queues + * per 10G port. + */ + if (n10g) + q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g; + + n1g = adap->params.nports - n10g; #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging support we need to be able to support up * to 8 Traffic Priorities; each of which will be assigned to its * own TX Queue in order to prevent Head-Of-Line Blocking. */ + q1g = 8; if (adap->params.nports * 8 > avail_eth_qsets) { dev_err(adap->pdev_dev, "DCB avail_eth_qsets=%d < %d!\n", avail_eth_qsets, adap->params.nports * 8); return -ENOMEM; } - for_each_port(adap, i) { - struct port_info *pi = adap2pinfo(adap, i); + if (adap->params.nports * ncpus < avail_eth_qsets) + q10g = max(8U, ncpus); + else + q10g = max(8U, q10g); - pi->first_qset = qidx; - pi->nqsets = is_kdump_kernel() ? 1 : 8; - qidx += pi->nqsets; - } -#else /* !CONFIG_CHELSIO_T4_DCB */ - /* We default to 1 queue per non-10G port and up to # of cores queues - * per 10G port. - */ - if (n10g) - q10g = (avail_eth_qsets - (adap->params.nports - n10g)) / n10g; - if (q10g > netif_get_num_default_rss_queues()) - q10g = netif_get_num_default_rss_queues(); + while ((q10g * n10g) > (avail_eth_qsets - n1g * q1g)) + q10g--; - if (is_kdump_kernel()) +#else /* !CONFIG_CHELSIO_T4_DCB */ + q1g = 1; + q10g = min(q10g, ncpus); +#endif /* !CONFIG_CHELSIO_T4_DCB */ + if (is_kdump_kernel()) { q10g = 1; + q1g = 1; + } for_each_port(adap, i) { struct port_info *pi = adap2pinfo(adap, i); pi->first_qset = qidx; - pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1; + pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : q1g; qidx += pi->nqsets; } -#endif /* !CONFIG_CHELSIO_T4_DCB */ s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ @@ -5443,7 +5448,7 @@ static int cfg_queues(struct adapter *adap) * capped by the number of available cores. */ num_ulds = adap->num_uld + adap->num_ofld_uld; - i = min_t(u32, MAX_OFLD_QSETS, num_online_cpus()); + i = min_t(u32, MAX_OFLD_QSETS, ncpus); avail_uld_qsets = roundup(i, adap->params.nports); if (avail_qsets < num_ulds * adap->params.nports) { adap->params.offload = 0; diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index acb2856936d2..6e2ab10ad2e6 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2013,10 +2013,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index cce90b5925d9..70060c51854f 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1405,6 +1405,8 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev) mac_addr = of_get_mac_address(np); if (!IS_ERR(mac_addr)) ether_addr_copy(pdata->dev_addr, mac_addr); + else if (PTR_ERR(mac_addr) == -EPROBE_DEFER) + return ERR_CAST(mac_addr); return pdata; } diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 0efdbd1a4a6f..32d470d4122a 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index b1f30b194300..117ffe08800d 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index a301f0095223..36e2e28fa6e3 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1,4 +1,5 @@ /* Copyright 2008 - 2016 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -123,7 +124,22 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); #define FSL_QMAN_MAX_OAL 127 /* Default alignment for start of data in an Rx FD */ +#ifdef CONFIG_DPAA_ERRATUM_A050385 +/* aligning data start to 64 avoids DMA transaction splits, unless the buffer + * is crossing a 4k page boundary + */ +#define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16) +/* aligning to 256 avoids DMA transaction splits caused by 4k page boundary + * crossings; also, all SG fragments except the last must have a size multiple + * of 256 to avoid DMA transaction splits + */ +#define DPAA_A050385_ALIGN 256 +#define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \ + DPAA_A050385_ALIGN : 16) +#else #define DPAA_FD_DATA_ALIGNMENT 16 +#define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT +#endif /* The DPAA requires 256 bytes reserved and mapped for the SGT */ #define DPAA_SGT_SIZE 256 @@ -158,8 +174,13 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); #define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result) #define DPAA_TIME_STAMP_SIZE 8 #define DPAA_HASH_RESULTS_SIZE 8 +#ifdef CONFIG_DPAA_ERRATUM_A050385 +#define DPAA_RX_PRIV_DATA_SIZE (DPAA_A050385_ALIGN - (DPAA_PARSE_RESULTS_SIZE\ + + DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE)) +#else #define DPAA_RX_PRIV_DATA_SIZE (u16)(DPAA_TX_PRIV_DATA_SIZE + \ dpaa_rx_extra_headroom) +#endif #define DPAA_ETH_PCD_RXQ_NUM 128 @@ -180,7 +201,12 @@ static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS]; #define DPAA_BP_RAW_SIZE 4096 +#ifdef CONFIG_DPAA_ERRATUM_A050385 +#define dpaa_bp_size(raw_size) (SKB_WITH_OVERHEAD(raw_size) & \ + ~(DPAA_A050385_ALIGN - 1)) +#else #define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size) +#endif static int dpaa_max_frm; @@ -1192,7 +1218,7 @@ static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp, buf_prefix_content.pass_prs_result = true; buf_prefix_content.pass_hash_result = true; buf_prefix_content.pass_time_stamp = true; - buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT; + buf_prefix_content.data_align = DPAA_FD_RX_DATA_ALIGNMENT; rx_p = ¶ms.specific_params.rx_params; rx_p->err_fqid = errq->fqid; @@ -1662,6 +1688,8 @@ static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd) return CHECKSUM_NONE; } +#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsigned long)(x), (a))) + /* Build a linear skb around the received buffer. * We are guaranteed there is enough room at the end of the data buffer to * accommodate the shared info area of the skb. @@ -1733,8 +1761,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, sg_addr = qm_sg_addr(&sgt[i]); sg_vaddr = phys_to_virt(sg_addr); - WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr, - SMP_CACHE_BYTES)); + WARN_ON(!PTR_IS_ALIGNED(sg_vaddr, SMP_CACHE_BYTES)); dma_unmap_page(priv->rx_dma_dev, sg_addr, DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE); @@ -2022,6 +2049,75 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, return 0; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 +int dpaa_a050385_wa(struct net_device *net_dev, struct sk_buff **s) +{ + struct dpaa_priv *priv = netdev_priv(net_dev); + struct sk_buff *new_skb, *skb = *s; + unsigned char *start, i; + + /* check linear buffer alignment */ + if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN)) + goto workaround; + + /* linear buffers just need to have an aligned start */ + if (!skb_is_nonlinear(skb)) + return 0; + + /* linear data size for nonlinear skbs needs to be aligned */ + if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN)) + goto workaround; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + /* all fragments need to have aligned start addresses */ + if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN)) + goto workaround; + + /* all but last fragment need to have aligned sizes */ + if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) && + (i < skb_shinfo(skb)->nr_frags - 1)) + goto workaround; + } + + return 0; + +workaround: + /* copy all the skb content into a new linear buffer */ + new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 + + priv->tx_headroom); + if (!new_skb) + return -ENOMEM; + + /* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */ + skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD); + + /* Workaround for DPAA_A050385 requires data start to be aligned */ + start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN); + if (start - new_skb->data != 0) + skb_reserve(new_skb, start - new_skb->data); + + skb_put(new_skb, skb->len); + skb_copy_bits(skb, 0, new_skb->data, skb->len); + skb_copy_header(new_skb, skb); + new_skb->dev = skb->dev; + + /* We move the headroom when we align it so we have to reset the + * network and transport header offsets relative to the new data + * pointer. The checksum offload relies on these offsets. + */ + skb_set_network_header(new_skb, skb_network_offset(skb)); + skb_set_transport_header(new_skb, skb_transport_offset(skb)); + + /* TODO: does timestamping need the result in the old skb? */ + dev_kfree_skb(skb); + *s = new_skb; + + return 0; +} +#endif + static netdev_tx_t dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { @@ -2068,6 +2164,14 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) nonlinear = skb_is_nonlinear(skb); } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + if (unlikely(fman_has_errata_a050385())) { + if (dpaa_a050385_wa(net_dev, &skb)) + goto enomem; + nonlinear = skb_is_nonlinear(skb); + } +#endif + if (nonlinear) { /* Just create a S/G fd based on the skb */ err = skb_to_sg_fd(priv, skb, &fd); @@ -2453,6 +2557,9 @@ static void dpaa_adjust_link(struct net_device *net_dev) mac_dev->adjust_link(mac_dev); } +/* The Aquantia PHYs are capable of performing rate adaptation */ +#define PHY_VEND_AQUANTIA 0x03a1b400 + static int dpaa_phy_init(struct net_device *net_dev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -2471,9 +2578,14 @@ static int dpaa_phy_init(struct net_device *net_dev) return -ENODEV; } - /* Remove any features not supported by the controller */ - ethtool_convert_legacy_u32_to_link_mode(mask, mac_dev->if_support); - linkmode_and(phy_dev->supported, phy_dev->supported, mask); + /* Unless the PHY is capable of rate adaptation */ + if (mac_dev->phy_if != PHY_INTERFACE_MODE_XGMII || + ((phy_dev->drv->phy_id & GENMASK(31, 10)) != PHY_VEND_AQUANTIA)) { + /* remove any features not supported by the controller */ + ethtool_convert_legacy_u32_to_link_mode(mask, + mac_dev->if_support); + linkmode_and(phy_dev->supported, phy_dev->supported, mask); + } phy_support_asym_pause(phy_dev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 2e99438cb1bf..de52686b1d46 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -36,7 +36,6 @@ void enetc_sched_speed_set(struct net_device *ndev) case SPEED_10: default: pspeed = ENETC_PMR_PSPEED_10M; - netdev_err(ndev, "Qbv PSPEED set speed link down.\n"); } priv->speed = speed; @@ -192,7 +191,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) u32 hi_credit_bit, hi_credit_reg; u32 max_interference_size; u32 port_frame_max_size; - u32 tc_max_sized_frame; u8 tc = cbs->queue; u8 prio_top, prio_next; int bw_sum = 0; @@ -250,7 +248,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) return -EINVAL; } - tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); + enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc)); /* For top prio TC, the max_interfrence_size is maxSizedFrame. * diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9294027e9d90..ab028f954ac5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2529,15 +2529,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); + dev_err(dev, "Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 0139cb9042ec..34150182cc35 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -8,3 +8,31 @@ config FSL_FMAN help Freescale Data-Path Acceleration Architecture Frame Manager (FMan) support + +config DPAA_ERRATUM_A050385 + bool + depends on ARM64 && FSL_DPAA + default y + help + DPAA FMan erratum A050385 software workaround implementation: + align buffers, data start, SG fragment length to avoid FMan DMA + splits. + FMAN DMA read or writes under heavy traffic load may cause FMAN + internal resource leak thus stopping further packet processing. + The FMAN internal queue can overflow when FMAN splits single + read or write transactions into multiple smaller transactions + such that more than 17 AXI transactions are in flight from FMAN + to interconnect. When the FMAN internal queue overflows, it can + stall further packet processing. The issue can occur with any + one of the following three conditions: + 1. FMAN AXI transaction crosses 4K address boundary (Errata + A010022) + 2. FMAN DMA address for an AXI transaction is not 16 byte + aligned, i.e. the last 4 bits of an address are non-zero + 3. Scatter Gather (SG) frames have more than one SG buffer in + the SG list and any one of the buffers, except the last + buffer in the SG list has data size that is not a multiple + of 16 bytes, i.e., other than 16, 32, 48, 64, etc. + With any one of the above three conditions present, there is + likelihood of stalled FMAN packet processing, especially under + stress with multiple ports injecting line-rate traffic. diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index 934111def0be..f151d6e111dd 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -566,6 +567,10 @@ struct fman_cfg { u32 qmi_def_tnums_thresh; }; +#ifdef CONFIG_DPAA_ERRATUM_A050385 +static bool fman_has_err_a050385; +#endif + static irqreturn_t fman_exceptions(struct fman *fman, enum fman_exceptions exception) { @@ -2518,6 +2523,14 @@ struct fman *fman_bind(struct device *fm_dev) } EXPORT_SYMBOL(fman_bind); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void) +{ + return fman_has_err_a050385; +} +EXPORT_SYMBOL(fman_has_errata_a050385); +#endif + static irqreturn_t fman_err_irq(int irq, void *handle) { struct fman *fman = (struct fman *)handle; @@ -2845,6 +2858,11 @@ static struct fman *read_dts_node(struct platform_device *of_dev) goto fman_free; } +#ifdef CONFIG_DPAA_ERRATUM_A050385 + fman_has_err_a050385 = + of_property_read_bool(fm_node, "fsl,erratum-a050385"); +#endif + return fman; fman_node_put: diff --git a/drivers/net/ethernet/freescale/fman/fman.h b/drivers/net/ethernet/freescale/fman/fman.h index 935c317fa696..f2ede1360f03 100644 --- a/drivers/net/ethernet/freescale/fman/fman.h +++ b/drivers/net/ethernet/freescale/fman/fman.h @@ -1,5 +1,6 @@ /* * Copyright 2008-2015 Freescale Semiconductor Inc. + * Copyright 2020 NXP * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -398,6 +399,10 @@ u16 fman_get_max_frm(void); int fman_get_rx_extra_headroom(void); +#ifdef CONFIG_DPAA_ERRATUM_A050385 +bool fman_has_errata_a050385(void); +#endif + struct fman *fman_bind(struct device *dev); #endif /* __FM_H */ diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 72868a28b621..7d08bf6370ae 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2205,13 +2205,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2225,7 +2229,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2235,7 +2239,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b3deb5e5ce29..d3161af9f8c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1675,7 +1675,7 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data) netif_dbg(h, drv, netdev, "setup tc: num_tc=%u\n", tc); return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? - kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; + kinfo->dcb_ops->setup_tc(h, tc ? tc : 1, prio_tc) : -EOPNOTSUPP; } static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 13dbd249f35f..b5e0ace648c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2450,10 +2450,12 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed, int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex) { + struct hclge_mac *mac = &hdev->hw.mac; int ret; duplex = hclge_check_speed_dup(duplex, speed); - if (hdev->hw.mac.speed == speed && hdev->hw.mac.duplex == duplex) + if (!mac->support_autoneg && mac->speed == speed && + mac->duplex == duplex) return 0; ret = hclge_cfg_mac_speed_dup_hw(hdev, speed, duplex); @@ -6106,6 +6108,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle, static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys, struct hclge_fd_rule_tuples *tuples) { +#define flow_ip6_src fkeys->addrs.v6addrs.src.in6_u.u6_addr32 +#define flow_ip6_dst fkeys->addrs.v6addrs.dst.in6_u.u6_addr32 + tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto); tuples->ip_proto = fkeys->basic.ip_proto; tuples->dst_port = be16_to_cpu(fkeys->ports.dst); @@ -6114,12 +6119,12 @@ static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys, tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src); tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst); } else { - memcpy(tuples->src_ip, - fkeys->addrs.v6addrs.src.in6_u.u6_addr32, - sizeof(tuples->src_ip)); - memcpy(tuples->dst_ip, - fkeys->addrs.v6addrs.dst.in6_u.u6_addr32, - sizeof(tuples->dst_ip)); + int i; + + for (i = 0; i < IPV6_SIZE; i++) { + tuples->src_ip[i] = be32_to_cpu(flow_ip6_src[i]); + tuples->dst_ip[i] = be32_to_cpu(flow_ip6_dst[i]); + } } } @@ -7722,16 +7727,27 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type, struct hclge_desc desc; int ret; - hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false); - + /* read current vlan filter parameter */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, true); req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data; req->vlan_type = vlan_type; - req->vlan_fe = filter_en ? fe_type : 0; req->vf_id = vf_id; + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get vlan filter config, ret = %d.\n", ret); + return ret; + } + + /* modify and write new config parameter */ + hclge_cmd_reuse_desc(&desc, false); + req->vlan_fe = filter_en ? + (req->vlan_fe | fe_type) : (req->vlan_fe & ~fe_type); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - dev_err(&hdev->pdev->dev, "set vlan filter fail, ret =%d.\n", + dev_err(&hdev->pdev->dev, "failed to set vlan filter, ret = %d.\n", ret); return ret; @@ -8470,6 +8486,28 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, } } +static void hclge_clear_vf_vlan(struct hclge_dev *hdev) +{ + struct hclge_vlan_info *vlan_info; + struct hclge_vport *vport; + int ret; + int vf; + + /* clear port base vlan for all vf */ + for (vf = HCLGE_VF_VPORT_START_NUM; vf < hdev->num_alloc_vport; vf++) { + vport = &hdev->vport[vf]; + vlan_info = &vport->port_base_vlan_cfg.vlan_info; + + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, + vlan_info->vlan_tag, true); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to clear vf vlan for vf%d, ret = %d\n", + vf - HCLGE_VF_VPORT_START_NUM, ret); + } +} + int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, u16 vlan_id, bool is_kill) { @@ -9821,6 +9859,13 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = init_mgr_tbl(hdev); + if (ret) { + dev_err(&pdev->dev, + "failed to reinit manager table, ret = %d\n", ret); + return ret; + } + ret = hclge_init_fd_config(hdev); if (ret) { dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret); @@ -9872,6 +9917,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) struct hclge_mac *mac = &hdev->hw.mac; hclge_reset_vf_rate(hdev); + hclge_clear_vf_vlan(hdev); hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 180224eab1ca..28db13253a5e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -566,7 +566,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) */ kinfo->num_tc = vport->vport_id ? 1 : min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); - vport->qs_offset = (vport->vport_id ? hdev->tm_info.num_tc : 0) + + vport->qs_offset = (vport->vport_id ? HNAE3_MAX_TC : 0) + (vport->vport_id ? (vport->vport_id - 1) : 0); max_rss_size = min_t(u16, hdev->rss_size_max, diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 6f2cf569a283..79b3d53f2fbf 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -297,6 +297,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index b069045de416..66fd2340d447 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -151,8 +151,8 @@ struct hinic_cmd_hw_ioctxt { u8 lro_en; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 517794509eb2..c7bb9ceca72c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -137,6 +137,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h index f4a339b10b10..79091e131418 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h @@ -94,6 +94,7 @@ struct hinic_rq { struct hinic_wq *wq; + struct cpumask affinity_mask; u32 irq; u16 msix_entry; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 2411ad270c98..42d00b049c6e 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -356,7 +356,8 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev) if (!num_cpus) num_cpus = num_online_cpus(); - nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus); + nic_dev->num_qps = hinic_hwdev_num_qps(hwdev); + nic_dev->num_qps = min_t(u16, nic_dev->num_qps, num_cpus); nic_dev->rss_limit = nic_dev->num_qps; nic_dev->num_rss = nic_dev->num_qps; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 56ea6d692f1c..2695ad69fca6 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -475,7 +475,6 @@ static int rx_request_irq(struct hinic_rxq *rxq) struct hinic_hwdev *hwdev = nic_dev->hwdev; struct hinic_rq *rq = rxq->rq; struct hinic_qp *qp; - struct cpumask mask; int err; rx_add_napi(rxq); @@ -492,8 +491,8 @@ static int rx_request_irq(struct hinic_rxq *rxq) } qp = container_of(rq, struct hinic_qp, rq); - cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask); - return irq_set_affinity_hint(rq->irq, &mask); + cpumask_set_cpu(qp->q_id % num_online_cpus(), &rq->affinity_mask); + return irq_set_affinity_hint(rq->irq, &rq->affinity_mask); } static void rx_free_irq(struct hinic_rxq *rxq) diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 69523ac85639..56b9e445732b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2362,7 +2362,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } - if (i40e_vc_validate_vqs_bitmaps(vqs)) { + if (!i40e_vc_validate_vqs_bitmaps(vqs)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2424,7 +2424,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } - if (i40e_vc_validate_vqs_bitmaps(vqs)) { + if (!i40e_vc_validate_vqs_bitmaps(vqs)) { aq_ret = I40E_ERR_PARAM; goto error_param; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index f73cd917c44f..3156de786d95 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -791,7 +791,7 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) struct i40e_ring *ring; if (test_bit(__I40E_CONFIG_BUSY, pf->state)) - return -ENETDOWN; + return -EAGAIN; if (test_bit(__I40E_VSI_DOWN, vsi->state)) return -ENETDOWN; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 77d6a0291e97..6939c14858b2 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -320,7 +320,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) if (err) return err; - dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", + dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n", ring->q_index); } else { if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) @@ -399,7 +399,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) /* Absolute queue number out of 2K needs to be passed */ err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q); if (err) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n", pf_q, err); return -EIO; @@ -422,7 +422,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) : ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring)); if (err) - dev_info(&vsi->back->pdev->dev, + dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n", ring->xsk_umem ? "UMEM enabled " : "", ring->q_index, pf_q); @@ -817,13 +817,13 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * queues at the hardware level anyway. */ if (status == ICE_ERR_RESET_ONGOING) { - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n"); } else if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); } else if (status) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n", status); return -ENODEV; } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index fb1d930470c7..cb437a448305 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -937,7 +937,7 @@ void ice_deinit_hw(struct ice_hw *hw) */ enum ice_status ice_check_reset(struct ice_hw *hw) { - u32 cnt, reg = 0, grst_delay; + u32 cnt, reg = 0, grst_delay, uld_mask; /* Poll for Device Active state in case a recent CORER, GLOBR, * or EMPR has occurred. The grst delay value is in 100ms units. @@ -959,13 +959,20 @@ enum ice_status ice_check_reset(struct ice_hw *hw) return ICE_ERR_RESET_FAILED; } -#define ICE_RESET_DONE_MASK (GLNVM_ULD_CORER_DONE_M | \ - GLNVM_ULD_GLOBR_DONE_M) +#define ICE_RESET_DONE_MASK (GLNVM_ULD_PCIER_DONE_M |\ + GLNVM_ULD_PCIER_DONE_1_M |\ + GLNVM_ULD_CORER_DONE_M |\ + GLNVM_ULD_GLOBR_DONE_M |\ + GLNVM_ULD_POR_DONE_M |\ + GLNVM_ULD_POR_DONE_1_M |\ + GLNVM_ULD_PCIER_DONE_2_M) + + uld_mask = ICE_RESET_DONE_MASK; /* Device is Active; check Global Reset processes are done */ for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) { - reg = rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK; - if (reg == ICE_RESET_DONE_MASK) { + reg = rd32(hw, GLNVM_ULD) & uld_mask; + if (reg == uld_mask) { ice_debug(hw, ICE_DBG_INIT, "Global reset processes done. %d\n", cnt); break; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index d870c1aedc17..265cf69b321b 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -713,13 +713,13 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) return -EINVAL; mutex_lock(&pf->tc_mutex); - ret = dcb_ieee_delapp(netdev, app); - if (ret) - goto delapp_out; - old_cfg = &pf->hw.port_info->local_dcbx_cfg; - if (old_cfg->numapps == 1) + if (old_cfg->numapps <= 1) + goto delapp_out; + + ret = dcb_ieee_delapp(netdev, app); + if (ret) goto delapp_out; new_cfg = &pf->hw.port_info->desired_dcbx_cfg; @@ -882,7 +882,7 @@ ice_dcbnl_vsi_del_app(struct ice_vsi *vsi, sapp.protocol = app->prot_id; sapp.priority = app->priority; err = ice_dcbnl_delapp(vsi->netdev, &sapp); - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n", vsi->idx, err, app->selector, app->prot_id, app->priority); } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 9ebd93e79aeb..594f6dbb2110 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -165,13 +165,24 @@ static void ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct ice_netdev_priv *np = netdev_priv(netdev); + u8 oem_ver, oem_patch, nvm_ver_hi, nvm_ver_lo; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u16 oem_build; strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); strlcpy(drvinfo->version, ice_drv_ver, sizeof(drvinfo->version)); - strlcpy(drvinfo->fw_version, ice_nvm_version_str(&pf->hw), - sizeof(drvinfo->fw_version)); + + /* Display NVM version (from which the firmware version can be + * determined) which contains more pertinent information. + */ + ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, + &nvm_ver_hi, &nvm_ver_lo); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%x.%02x 0x%x %d.%d.%d", nvm_ver_hi, nvm_ver_lo, + hw->nvm.eetrack, oem_ver, oem_build, oem_patch); + strlcpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; @@ -1043,7 +1054,7 @@ ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) fec = ICE_FEC_NONE; break; default: - dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n", + dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n", fecparam->fec); return -EINVAL; } @@ -2966,13 +2977,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) else return -EINVAL; - /* Tell the OS link is going down, the link will go back up when fw - * says it is ready asynchronously - */ - ice_print_link_msg(vsi, false); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); - /* Set the FC mode and only restart AN if link is up */ status = ice_set_fc(pi, &aq_failures, link_up); diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index e8f32350fed2..6f4a70fa3903 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -276,8 +276,14 @@ #define GLNVM_GENS_SR_SIZE_S 5 #define GLNVM_GENS_SR_SIZE_M ICE_M(0x7, 5) #define GLNVM_ULD 0x000B6008 +#define GLNVM_ULD_PCIER_DONE_M BIT(0) +#define GLNVM_ULD_PCIER_DONE_1_M BIT(1) #define GLNVM_ULD_CORER_DONE_M BIT(3) #define GLNVM_ULD_GLOBR_DONE_M BIT(4) +#define GLNVM_ULD_POR_DONE_M BIT(5) +#define GLNVM_ULD_POR_DONE_1_M BIT(8) +#define GLNVM_ULD_PCIER_DONE_2_M BIT(9) +#define GLNVM_ULD_PE_DONE_M BIT(10) #define GLPCI_CNF2 0x000BE004 #define GLPCI_CNF2_CACHELINE_SIZE_M BIT(1) #define PF_FUNC_RID 0x0009E880 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index e7449248fab4..b43bb51f6067 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -116,7 +116,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC; break; default: - dev_dbg(&vsi->back->pdev->dev, + dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n", vsi->type); break; @@ -697,7 +697,7 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) vsi->num_txq = tx_count; if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) { - dev_dbg(&vsi->back->pdev->dev, "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); + dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n"); /* since there is a chance that num_rxq could have been changed * in the above for loop, make num_txq equal to num_rxq. */ @@ -1306,7 +1306,7 @@ int ice_vsi_cfg_rxqs(struct ice_vsi *vsi) err = ice_setup_rx_ctx(vsi->rx_rings[i]); if (err) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n", i, err); return err; @@ -1476,7 +1476,7 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for VLAN insert failed, err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1522,7 +1522,7 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", ena, status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -1696,7 +1696,7 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[i]; if (!q_vector) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n", i, vsi->vsi_num); goto clear_reg_idx; @@ -2647,25 +2647,6 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) } #endif /* CONFIG_DCB */ -/** - * ice_nvm_version_str - format the NVM version strings - * @hw: ptr to the hardware info - */ -char *ice_nvm_version_str(struct ice_hw *hw) -{ - u8 oem_ver, oem_patch, ver_hi, ver_lo; - static char buf[ICE_NVM_VER_LEN]; - u16 oem_build; - - ice_get_nvm_version(hw, &oem_ver, &oem_build, &oem_patch, &ver_hi, - &ver_lo); - - snprintf(buf, sizeof(buf), "%x.%02x 0x%x %d.%d.%d", ver_hi, ver_lo, - hw->nvm.eetrack, oem_ver, oem_build, oem_patch); - - return buf; -} - /** * ice_update_ring_stats - Update ring statistics * @ring: ring to update @@ -2737,6 +2718,6 @@ ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set) status = ice_remove_mac(&vsi->back->hw, &tmp_add_list); cfg_mac_fltr_exit: - ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list); + ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 6e31e30aba39..0d2b1119c0e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -97,8 +97,6 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran); -char *ice_nvm_version_str(struct ice_hw *hw); - enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 69bff085acf7..7f71f06fa819 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -269,7 +269,7 @@ static int ice_cfg_promisc(struct ice_vsi *vsi, u8 promisc_m, bool set_promisc) */ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; + struct device *dev = ice_pf_to_dev(vsi->back); struct net_device *netdev = vsi->netdev; bool promisc_forced_on = false; struct ice_pf *pf = vsi->back; @@ -1235,7 +1235,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >> GL_MDET_TX_TCLAN_QNUM_S); - if (netif_msg_rx_err(pf)) + if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); @@ -1364,7 +1364,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) if (vsi->type != ICE_VSI_PF) return 0; - dev = &vsi->back->pdev->dev; + dev = ice_pf_to_dev(vsi->back); pi = vsi->port_info; @@ -1682,7 +1682,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) */ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) { - struct device *dev = &vsi->back->pdev->dev; + struct device *dev = ice_pf_to_dev(vsi->back); int i; for (i = 0; i < vsi->num_xdp_txq; i++) { @@ -3241,11 +3241,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_exit_unroll; } - dev_info(dev, "firmware %d.%d.%d api %d.%d.%d nvm %s build 0x%08x\n", - hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch, - hw->api_maj_ver, hw->api_min_ver, hw->api_patch, - ice_nvm_version_str(hw), hw->fw_build); - ice_request_fw(pf); /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be @@ -3863,14 +3858,14 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) /* Don't set any netdev advanced features with device in Safe Mode */ if (ice_is_safe_mode(vsi->back)) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Device is in Safe Mode - not enabling advanced netdev features\n"); return ret; } /* Do not change setting during reset */ if (ice_is_reset_in_progress(pf->state)) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Device is resetting, changing advanced netdev features temporarily unavailable.\n"); return -EBUSY; } @@ -4413,7 +4408,7 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_txq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Tx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n", vsi->vsi_num); return -EINVAL; } @@ -4444,7 +4439,7 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) int i, err = 0; if (!vsi->num_rxq) { - dev_err(&vsi->back->pdev->dev, "VSI %d has 0 Rx queues\n", + dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n", vsi->vsi_num); return -EINVAL; } @@ -4973,7 +4968,7 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(&vsi->back->pdev->dev, "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", + dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", bmode, status, hw->adminq.sq_last_status); ret = -EIO; goto out; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 2c212f64d99f..f4015a5fb5c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1071,13 +1071,14 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) ice_put_rx_buf(rx_ring, rx_buf); continue; construct_skb: - if (skb) + if (skb) { ice_add_rx_frag(rx_ring, rx_buf, skb, size); - else if (ice_ring_uses_build_skb(rx_ring)) - skb = ice_build_skb(rx_ring, rx_buf, &xdp); - else - skb = ice_construct_skb(rx_ring, rx_buf, &xdp); - + } else if (likely(xdp.data)) { + if (ice_ring_uses_build_skb(rx_ring)) + skb = ice_build_skb(rx_ring, rx_buf, &xdp); + else + skb = ice_construct_skb(rx_ring, rx_buf, &xdp); + } /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_buf_failed++; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 35bbc4ff603c..6da048a6ca7c 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -10,7 +10,7 @@ */ void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) { - u16 prev_ntu = rx_ring->next_to_use; + u16 prev_ntu = rx_ring->next_to_use & ~0x7; rx_ring->next_to_use = val; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index edb374296d1f..e2114f24a19e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -508,7 +508,7 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 vid, bool enable) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(&vsi->back->pdev->dev, "update VSI for port VLAN failed, err %d aq_err %d\n", + dev_info(ice_pf_to_dev(vsi->back), "update VSI for port VLAN failed, err %d aq_err %d\n", status, hw->adminq.sq_last_status); ret = -EIO; goto out; @@ -2019,7 +2019,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) continue; if (ice_vsi_ctrl_rx_ring(vsi, true, vf_q_id)) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2122,7 +2122,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) if (ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta)) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -2149,7 +2149,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) continue; if (ice_vsi_ctrl_rx_ring(vsi, false, vf_q_id)) { - dev_err(&vsi->back->pdev->dev, + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", vf_q_id, vsi->vsi_num); v_ret = VIRTCHNL_STATUS_ERR_PARAM; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 67ad8b8b127d..e540f78e38a3 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -324,8 +324,7 @@ ETH_HLEN + ETH_FCS_LEN, \ cache_line_size()) -#define MVNETA_SKB_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ - NET_IP_ALIGN) +#define MVNETA_SKB_HEADROOM max(XDP_PACKET_HEADROOM, NET_SKB_PAD) #define MVNETA_SKB_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \ MVNETA_SKB_HEADROOM)) #define MVNETA_SKB_SIZE(len) (SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD) @@ -402,6 +401,8 @@ struct mvneta_pcpu_stats { struct u64_stats_sync syncp; u64 rx_packets; u64 rx_bytes; + u64 rx_dropped; + u64 rx_errors; u64 tx_packets; u64 tx_bytes; }; @@ -739,6 +740,8 @@ mvneta_get_stats64(struct net_device *dev, struct mvneta_pcpu_stats *cpu_stats; u64 rx_packets; u64 rx_bytes; + u64 rx_dropped; + u64 rx_errors; u64 tx_packets; u64 tx_bytes; @@ -747,19 +750,20 @@ mvneta_get_stats64(struct net_device *dev, start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); rx_packets = cpu_stats->rx_packets; rx_bytes = cpu_stats->rx_bytes; + rx_dropped = cpu_stats->rx_dropped; + rx_errors = cpu_stats->rx_errors; tx_packets = cpu_stats->tx_packets; tx_bytes = cpu_stats->tx_bytes; } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); stats->rx_packets += rx_packets; stats->rx_bytes += rx_bytes; + stats->rx_dropped += rx_dropped; + stats->rx_errors += rx_errors; stats->tx_packets += tx_packets; stats->tx_bytes += tx_bytes; } - stats->rx_errors = dev->stats.rx_errors; - stats->rx_dropped = dev->stats.rx_dropped; - stats->tx_dropped = dev->stats.tx_dropped; } @@ -1167,6 +1171,7 @@ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu) mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id); pp->bm_priv = NULL; + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1); netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n"); } @@ -1736,8 +1741,14 @@ static u32 mvneta_txq_desc_csum(int l3_offs, int l3_proto, static void mvneta_rx_error(struct mvneta_port *pp, struct mvneta_rx_desc *rx_desc) { + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); u32 status = rx_desc->status; + /* update per-cpu counter */ + u64_stats_update_begin(&stats->syncp); + stats->rx_errors++; + u64_stats_update_end(&stats->syncp); + switch (status & MVNETA_RXD_ERR_CODE_MASK) { case MVNETA_RXD_ERR_CRC: netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n", @@ -2179,11 +2190,15 @@ mvneta_swbm_rx_frame(struct mvneta_port *pp, rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE); if (unlikely(!rxq->skb)) { - netdev_err(dev, - "Can't allocate skb on queue %d\n", - rxq->id); - dev->stats.rx_dropped++; + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); + + netdev_err(dev, "Can't allocate skb on queue %d\n", rxq->id); rxq->skb_alloc_err++; + + u64_stats_update_begin(&stats->syncp); + stats->rx_dropped++; + u64_stats_update_end(&stats->syncp); + return -ENOMEM; } page_pool_release_page(rxq->page_pool, page); @@ -2270,7 +2285,6 @@ static int mvneta_rx_swbm(struct napi_struct *napi, /* Check errors only for FIRST descriptor */ if (rx_status & MVNETA_RXD_ERR_SUMMARY) { mvneta_rx_error(pp, rx_desc); - dev->stats.rx_errors++; /* leave the descriptor untouched */ continue; } @@ -2372,7 +2386,6 @@ static int mvneta_rx_hwbm(struct napi_struct *napi, mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool, rx_desc->buf_phys_addr); err_drop_frame: - dev->stats.rx_errors++; mvneta_rx_error(pp, rx_desc); /* leave the descriptor untouched */ continue; @@ -4226,6 +4239,12 @@ static int mvneta_xdp_setup(struct net_device *dev, struct bpf_prog *prog, return -EOPNOTSUPP; } + if (pp->bm_priv) { + NL_SET_ERR_MSG_MOD(extack, + "Hardware Buffer Management not supported on XDP"); + return -EOPNOTSUPP; + } + need_update = !!pp->xdp_prog != !!prog; if (running && need_update) mvneta_stop(dev); @@ -4942,7 +4961,6 @@ static int mvneta_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); pp->id = global_port_id++; - pp->rx_offset_correction = MVNETA_SKB_HEADROOM; /* Obtain access to BM resources if enabled and already initialized */ bm_node = of_parse_phandle(dn, "buffer-manager", 0); @@ -4967,6 +4985,10 @@ static int mvneta_probe(struct platform_device *pdev) } of_node_put(bm_node); + /* sw buffer management */ + if (!pp->bm_priv) + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + err = mvneta_init(&pdev->dev, pp); if (err < 0) goto err_netdev; @@ -5124,6 +5146,7 @@ static int mvneta_resume(struct device *device) err = mvneta_bm_port_init(pdev, pp); if (err < 0) { dev_info(&pdev->dev, "use SW buffer management\n"); + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; pp->bm_priv = NULL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index d787bc0a4155..e09bc3858d57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -45,7 +45,7 @@ void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, tls)) + if (!MLX5_CAP_GEN(mdev, tls_tx)) return false; if (!MLX5_CAP_GEN(mdev, log_max_dek)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3a975641f902..20b907dc1e29 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -200,7 +200,7 @@ int mlx5e_health_report(struct mlx5e_priv *priv, netdev_err(priv->netdev, err_str); if (!reporter) - return err_ctx->recover(&err_ctx->ctx); + return err_ctx->recover(err_ctx->ctx); return devlink_health_report(reporter, err_str, err_ctx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 7c8796d9743f..a226277b0980 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -179,6 +179,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } +static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) +{ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + mlx5_wq_ll_reset(&rq->mpwqe.wq); + else + mlx5_wq_cyc_reset(&rq->wqe.wq); +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 71384ad1a443..ef1ed15a53b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -269,7 +269,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; - if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) { skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4997b8a51994..5d9cfac67236 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -721,6 +721,9 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) if (!in) return -ENOMEM; + if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY) + mlx5e_rqwq_reset(rq); + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, curr_state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9e9960146e5b..1c3ab69cbd96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -613,13 +613,6 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { - netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); - if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - queue_work(cq->channel->priv->wq, &sq->recover_work); - break; - } do { struct mlx5e_sq_wqe_info *wi; u16 ci; @@ -629,6 +622,15 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.ico_wqe[ci]; + if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->channel->priv->wq, &sq->recover_work); + break; + } + if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; wi->umr.rq->mpwqe.umr_completed++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 9f09253f9f46..a05158472ed1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -297,6 +297,9 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; #endif s->tx_cqes += sq_stats->cqes; + + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */ + barrier(); } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 2565ba8692d9..ee60383adc5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -451,34 +451,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) i = 0; do { + struct mlx5e_tx_wqe_info *wi; u16 wqe_counter; bool last_wqe; + u16 ci; mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); - if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { - if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, - &sq->state)) { - struct mlx5e_tx_wqe_info *wi; - u16 ci; - - ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); - wi = &sq->db.wqe_info[ci]; - mlx5e_dump_error_cqe(sq, - (struct mlx5_err_cqe *)cqe); - mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); - queue_work(cq->channel->priv->wq, - &sq->recover_work); - } - stats->cqe_err++; - } - do { - struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; - u16 ci; int j; last_wqe = (sqcc == wqe_counter); @@ -516,6 +499,18 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) napi_consume_skb(skb, napi_budget); } while (!last_wqe); + if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + queue_work(cq->channel->priv->wq, + &sq->recover_work); + } + stats->cqe_err++; + } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); stats->cqes += i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 3df3604e8929..07282c679dcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -456,12 +456,16 @@ static void esw_destroy_legacy_table(struct mlx5_eswitch *esw) static int esw_legacy_enable(struct mlx5_eswitch *esw) { - int ret; + struct mlx5_vport *vport; + int ret, i; ret = esw_create_legacy_table(esw); if (ret) return ret; + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + ret = mlx5_eswitch_enable_pf_vf_vports(esw, MLX5_LEGACY_SRIOV_VPORT_EVENTS); if (ret) esw_destroy_legacy_table(esw); @@ -2449,25 +2453,17 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) { - int err = 0; - if (!esw) return -EOPNOTSUPP; if (!ESW_ALLOWED(esw)) return -EPERM; - mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { - err = -EOPNOTSUPP; - goto out; - } + if (esw->mode != MLX5_ESWITCH_LEGACY) + return -EOPNOTSUPP; *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; - -out: - mutex_unlock(&esw->state_lock); - return err; + return 0; } int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3e6412783078..dfefc6250f23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1377,7 +1377,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return -EINVAL; } - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { @@ -2271,7 +2271,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, { int err, err1; - mlx5_eswitch_disable(esw, true); + mlx5_eswitch_disable(esw, false); err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index e4ec0e03c289..4c61d25d2e88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -850,6 +850,7 @@ void mlx5_fpga_ipsec_delete_sa_ctx(void *context) mutex_lock(&fpga_xfrm->lock); if (!--fpga_xfrm->num_rules) { mlx5_fpga_ipsec_release_sa_ctx(fpga_xfrm->sa_ctx); + kfree(fpga_xfrm->sa_ctx); fpga_xfrm->sa_ctx = NULL; } mutex_unlock(&fpga_xfrm->lock); @@ -1478,7 +1479,7 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, if (!memcmp(&xfrm->attrs, attrs, sizeof(xfrm->attrs))) return 0; - if (!mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { + if (mlx5_fpga_esp_validate_xfrm_attrs(mdev, attrs)) { mlx5_core_warn(mdev, "Tried to create an esp with unsupported attrs\n"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 8c5df6c7d7b6..8af87f922fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1555,16 +1555,16 @@ struct match_list_head { struct match_list first; }; -static void free_match_list(struct match_list_head *head) +static void free_match_list(struct match_list_head *head, bool ft_locked) { if (!list_empty(&head->list)) { struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node, false); + tree_put_node(&head->first.g->node, ft_locked); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node, false); + tree_put_node(&iter->g->node, ft_locked); list_del(&iter->list); kfree(iter); } @@ -1573,7 +1573,8 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - const struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec, + bool ft_locked) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1598,7 +1599,7 @@ static int build_match_list(struct match_list_head *match_head, curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); if (!curr_match) { - free_match_list(match_head); + free_match_list(match_head, ft_locked); err = -ENOMEM; goto out; } @@ -1778,7 +1779,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, version = atomic_read(&ft->node.version); /* Collect all fgs which has a matching match_criteria */ - err = build_match_list(&match_head, ft, spec); + err = build_match_list(&match_head, ft, spec, take_write); if (err) { if (take_write) up_write_ref_node(&ft->node, false); @@ -1792,7 +1793,7 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest, dest_num, version); - free_match_list(&match_head); + free_match_list(&match_head, take_write); if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index a19790dee7b2..13e86f0b42f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -239,7 +239,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, tls)) { + if (MLX5_CAP_GEN(dev, tls_tx)) { err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index c6c7d1defbd7..aade62a9ee5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -2307,7 +2307,9 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_cmd_caps *caps; + u8 *bit_mask = sb->bit_mask; u8 *tag = hw_ste->tag; + bool source_gvmi_set; DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn); @@ -2328,7 +2330,8 @@ static int dr_ste_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (!vport_cap) return -EINVAL; - if (vport_cap->vport_gvmi) + source_gvmi_set = MLX5_GET(ste_src_gvmi_qp, bit_mask, source_gvmi); + if (vport_cap->vport_gvmi && source_gvmi_set) MLX5_SET(ste_src_gvmi_qp, tag, source_gvmi, vport_cap->vport_gvmi); misc->source_eswitch_owner_vhca_id = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index f2a0e72285ba..01f075fac276 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -89,11 +89,18 @@ void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides) len = nstrides << wq->fbc.log_stride; wqe = mlx5_wq_cyc_get_wqe(wq, ix); - pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %ld\n", + pr_info("WQE DUMP: WQ size %d WQ cur size %d, WQE index 0x%x, len: %zu\n", mlx5_wq_cyc_get_size(wq), wq->cur_sz, ix, len); print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, wqe, len, false); } +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq) +{ + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_cyc_update_db_record(wq); +} + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -192,6 +199,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, return err; } +static void mlx5_wq_ll_init_list(struct mlx5_wq_ll *wq) +{ + struct mlx5_wqe_srq_next_seg *next_seg; + int i; + + for (i = 0; i < wq->fbc.sz_m1; i++) { + next_seg = mlx5_wq_ll_get_wqe(wq, i); + next_seg->next_wqe_index = cpu_to_be16(i + 1); + } + next_seg = mlx5_wq_ll_get_wqe(wq, i); + wq->tail_next = &next_seg->next_wqe_index; +} + int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) @@ -199,9 +219,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; - struct mlx5_wqe_srq_next_seg *next_seg; int err; - int i; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -220,13 +238,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); - for (i = 0; i < fbc->sz_m1; i++) { - next_seg = mlx5_wq_ll_get_wqe(wq, i); - next_seg->next_wqe_index = cpu_to_be16(i + 1); - } - next_seg = mlx5_wq_ll_get_wqe(wq, i); - wq->tail_next = &next_seg->next_wqe_index; - + mlx5_wq_ll_init_list(wq); wq_ctrl->mdev = mdev; return 0; @@ -237,6 +249,15 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, return err; } +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq) +{ + wq->head = 0; + wq->wqe_ctr = 0; + wq->cur_sz = 0; + mlx5_wq_ll_init_list(wq); + mlx5_wq_ll_update_db_record(wq); +} + void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) { mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index d9a94bc223c0..4cadc336593f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -80,6 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl); void mlx5_wq_cyc_wqe_dump(struct mlx5_wq_cyc *wq, u16 ix, u8 nstrides); +void mlx5_wq_cyc_reset(struct mlx5_wq_cyc *wq); int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, @@ -92,6 +93,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_wq_ll_reset(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 9bf8da5f6daf..3fe878d7c94c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -573,6 +573,7 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) { + enum mlxsw_reg_mgpir_device_type device_type; int index, max_index, sensor_index; char mgpir_pl[MLXSW_REG_MGPIR_LEN]; char mtmp_pl[MLXSW_REG_MTMP_LEN]; @@ -584,8 +585,9 @@ static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL, NULL); - if (!gbox_num) + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL); + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) return 0; index = mlxsw_hwmon->module_sensor_max; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index c721b171bd8d..ce0a6837daa3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -895,8 +895,10 @@ static int mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { + enum mlxsw_reg_mgpir_device_type device_type; struct mlxsw_thermal_module *gearbox_tz; char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + u8 gbox_num; int i; int err; @@ -908,11 +910,13 @@ mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, if (err) return err; - mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL, + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL, NULL); - if (!thermal->tz_gearbox_num) + if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE || + !gbox_num) return 0; + thermal->tz_gearbox_num = gbox_num; thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num, sizeof(*thermal->tz_gearbox_arr), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 2b543911ae00..c4caeeadcba9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -213,8 +213,8 @@ mlxsw_m_port_create(struct mlxsw_m *mlxsw_m, u8 local_port, u8 module) err_register_netdev: mlxsw_m->ports[local_port] = NULL; - free_netdev(dev); err_dev_addr_get: + free_netdev(dev); err_alloc_etherdev: mlxsw_core_port_fini(mlxsw_m->core, local_port); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index e0d7d2d9a0c8..43fa8c85b5d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -28,7 +28,7 @@ #define MLXSW_PCI_SW_RESET 0xF0010 #define MLXSW_PCI_SW_RESET_RST_BIT BIT(0) #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 -#define MLXSW_PCI_SW_RESET_WAIT_MSECS 100 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 49933818c6f5..2dc0978428e6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -215,7 +215,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -247,6 +247,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index a41a90c589db..58579baf3f7a 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -156,24 +156,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -184,27 +166,11 @@ static u8 ks_rdreg8(struct ks_net *ks, int offset) static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -215,7 +181,7 @@ static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } @@ -231,7 +197,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -245,7 +211,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) @@ -324,8 +290,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -429,7 +394,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -446,7 +411,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -548,14 +513,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -581,6 +549,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -650,6 +619,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -679,13 +649,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ @@ -706,10 +676,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -723,9 +692,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 985b46d7e3d1..88c0464a54e2 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2175,24 +2175,29 @@ static int ocelot_init_timestamp(struct ocelot *ocelot) return 0; } -static void ocelot_port_set_mtu(struct ocelot *ocelot, int port, size_t mtu) +/* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu. + * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG. + */ +static void ocelot_port_set_maxlen(struct ocelot *ocelot, int port, size_t sdu) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + int maxlen = sdu + ETH_HLEN + ETH_FCS_LEN; int atop_wm; - ocelot_port_writel(ocelot_port, mtu, DEV_MAC_MAXLEN_CFG); + ocelot_port_writel(ocelot_port, maxlen, DEV_MAC_MAXLEN_CFG); /* Set Pause WM hysteresis - * 152 = 6 * mtu / OCELOT_BUFFER_CELL_SZ - * 101 = 4 * mtu / OCELOT_BUFFER_CELL_SZ + * 152 = 6 * maxlen / OCELOT_BUFFER_CELL_SZ + * 101 = 4 * maxlen / OCELOT_BUFFER_CELL_SZ */ ocelot_write_rix(ocelot, SYS_PAUSE_CFG_PAUSE_ENA | SYS_PAUSE_CFG_PAUSE_STOP(101) | SYS_PAUSE_CFG_PAUSE_START(152), SYS_PAUSE_CFG, port); /* Tail dropping watermark */ - atop_wm = (ocelot->shared_queue_sz - 9 * mtu) / OCELOT_BUFFER_CELL_SZ; - ocelot_write_rix(ocelot, ocelot_wm_enc(9 * mtu), + atop_wm = (ocelot->shared_queue_sz - 9 * maxlen) / + OCELOT_BUFFER_CELL_SZ; + ocelot_write_rix(ocelot, ocelot_wm_enc(9 * maxlen), SYS_ATOP, port); ocelot_write(ocelot, ocelot_wm_enc(atop_wm), SYS_ATOP_TOT_CFG); } @@ -2221,9 +2226,10 @@ void ocelot_init_port(struct ocelot *ocelot, int port) DEV_MAC_HDX_CFG); /* Set Max Length and maximum tags allowed */ - ocelot_port_set_mtu(ocelot, port, VLAN_ETH_FRAME_LEN); + ocelot_port_set_maxlen(ocelot, port, ETH_DATA_LEN); ocelot_port_writel(ocelot_port, DEV_MAC_TAGS_CFG_TAG_ID(ETH_P_8021AD) | DEV_MAC_TAGS_CFG_VLAN_AWR_ENA | + DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA | DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, DEV_MAC_TAGS_CFG); @@ -2309,18 +2315,18 @@ void ocelot_set_cpu_port(struct ocelot *ocelot, int cpu, * Only one port can be an NPI at the same time. */ if (cpu < ocelot->num_phys_ports) { - int mtu = VLAN_ETH_FRAME_LEN + OCELOT_TAG_LEN; + int sdu = ETH_DATA_LEN + OCELOT_TAG_LEN; ocelot_write(ocelot, QSYS_EXT_CPU_CFG_EXT_CPUQ_MSK_M | QSYS_EXT_CPU_CFG_EXT_CPU_PORT(cpu), QSYS_EXT_CPU_CFG); if (injection == OCELOT_TAG_PREFIX_SHORT) - mtu += OCELOT_SHORT_PREFIX_LEN; + sdu += OCELOT_SHORT_PREFIX_LEN; else if (injection == OCELOT_TAG_PREFIX_LONG) - mtu += OCELOT_LONG_PREFIX_LEN; + sdu += OCELOT_LONG_PREFIX_LEN; - ocelot_port_set_mtu(ocelot, cpu, mtu); + ocelot_port_set_maxlen(ocelot, cpu, sdu); } /* CPU port Injection/Extraction configuration */ diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index 2da8eee27e98..ecbd4be145b8 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -114,6 +114,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) if (err != 4) break; + /* At this point the IFH was read correctly, so it is safe to + * presume that there is no error. The err needs to be reset + * otherwise a frame could come in CPU queue between the while + * condition and the check for error later on. And in that case + * the new frame is just removed and not processed. + */ + err = 0; + ocelot_parse_ifh(ifh, &info); ocelot_port = ocelot->ports[info.port]; diff --git a/drivers/net/ethernet/mscc/ocelot_dev.h b/drivers/net/ethernet/mscc/ocelot_dev.h index 0a50d53bbd3f..7c08437061fc 100644 --- a/drivers/net/ethernet/mscc/ocelot_dev.h +++ b/drivers/net/ethernet/mscc/ocelot_dev.h @@ -74,7 +74,7 @@ #define DEV_MAC_TAGS_CFG_TAG_ID_M GENMASK(31, 16) #define DEV_MAC_TAGS_CFG_TAG_ID_X(x) (((x) & GENMASK(31, 16)) >> 16) #define DEV_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(2) -#define DEV_MAC_TAGS_CFG_PB_ENA BIT(1) +#define DEV_MAC_TAGS_CFG_VLAN_DBL_AWR_ENA BIT(1) #define DEV_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) #define DEV_MAC_ADV_CHK_CFG 0x2c diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index 9f8a1f69c0c4..23ebddfb9532 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, u8 mask, val; int err; - if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) { - err = -EOPNOTSUPP; + if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) goto err_delete; - } tos_off = proto == htons(ETH_P_IP) ? 16 : 20; @@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, if ((iter->val & cmask) == (val & cmask) && iter->band != knode->res->classid) { NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter"); - err = -EOPNOTSUPP; goto err_delete; } } if (!match) { match = kzalloc(sizeof(*match), GFP_KERNEL); - if (!match) { - err = -ENOMEM; - goto err_delete; - } - + if (!match) + return -ENOMEM; list_add(&match->list, &alink->dscp_map); } match->handle = knode->handle; @@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink, err_delete: nfp_abm_u32_knode_delete(alink, knode); - return err; + return -EOPNOTSUPP; } static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c index 5f9d2ec70446..61c06fbe10db 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c @@ -103,7 +103,7 @@ int ionic_heartbeat_check(struct ionic *ionic) { struct ionic_dev *idev = &ionic->idev; unsigned long hb_time; - u32 fw_status; + u8 fw_status; u32 hb; /* wait a little more than one second before testing again */ @@ -111,9 +111,12 @@ int ionic_heartbeat_check(struct ionic *ionic) if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) return 0; - /* firmware is useful only if fw_status is non-zero */ - fw_status = ioread32(&idev->dev_info_regs->fw_status); - if (!fw_status) + /* firmware is useful only if the running bit is set and + * fw_status != 0xff (bad PCI read) + */ + fw_status = ioread8(&idev->dev_info_regs->fw_status); + if (fw_status == 0xff || + !(fw_status & IONIC_FW_STS_F_RUNNING)) return -ENXIO; /* early FW has no heartbeat, else FW will return non-zero */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 39317cdfa6cf..d5e8b4e2a96e 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -862,7 +862,7 @@ struct ionic_rxq_comp { #define IONIC_RXQ_COMP_CSUM_F_VLAN 0x40 #define IONIC_RXQ_COMP_CSUM_F_CALC 0x80 u8 pkt_type_color; -#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x0f +#define IONIC_RXQ_COMP_PKT_TYPE_MASK 0x7f }; enum ionic_pkt_type { @@ -2348,6 +2348,7 @@ union ionic_dev_info_regs { u8 version; u8 asic_type; u8 asic_rev; +#define IONIC_FW_STS_F_RUNNING 0x1 u8 fw_status; u32 fw_heartbeat; char fw_version[IONIC_DEVINFO_FWVERS_BUFLEN]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c index 0dacf2c18c09..3e613058e225 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c @@ -44,8 +44,8 @@ /* Add/subtract the Adjustment_Value when making a Drift adjustment */ #define QED_DRIFT_CNTR_DIRECTION_SHIFT 31 #define QED_TIMESTAMP_MASK BIT(16) -/* Param mask for Hardware to detect/timestamp the unicast PTP packets */ -#define QED_PTP_UCAST_PARAM_MASK 0xF +/* Param mask for Hardware to detect/timestamp the L2/L4 unicast PTP packets */ +#define QED_PTP_UCAST_PARAM_MASK 0x70F static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn) { diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index e8a1b27db84d..234c6f30effb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -163,6 +163,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; bool exp_recovery; }; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index ffabc2d2f082..2d873ae8a234 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -59,6 +59,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -83,8 +86,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -310,15 +328,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 06de59521fc4..fbf4cbcf1a65 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -13,25 +13,6 @@ #include "rmnet_vnd.h" #include "rmnet_private.h" -/* Locking scheme - - * The shared resource which needs to be protected is realdev->rx_handler_data. - * For the writer path, this is using rtnl_lock(). The writer paths are - * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These - * paths are already called with rtnl_lock() acquired in. There is also an - * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For - * dereference here, we will need to use rtnl_dereference(). Dev list writing - * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link(). - * For the reader path, the real_dev->rx_handler_data is called in the TX / RX - * path. We only need rcu_read_lock() for these scenarios. In these cases, - * the rcu_read_lock() is held in __dev_queue_xmit() and - * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl() - * to get the relevant information. For dev list reading, we again acquire - * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu(). - * We also use unregister_netdevice_many() to free all rmnet devices in - * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in - * same context. - */ - /* Local Definitions and Declarations */ static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = { @@ -51,9 +32,10 @@ rmnet_get_port_rtnl(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); } -static int rmnet_unregister_real_device(struct net_device *real_dev, - struct rmnet_port *port) +static int rmnet_unregister_real_device(struct net_device *real_dev) { + struct rmnet_port *port = rmnet_get_port_rtnl(real_dev); + if (port->nr_rmnet_devs) return -EINVAL; @@ -61,9 +43,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, kfree(port); - /* release reference on real_dev */ - dev_put(real_dev); - netdev_dbg(real_dev, "Removed from rmnet\n"); return 0; } @@ -89,9 +68,6 @@ static int rmnet_register_real_device(struct net_device *real_dev) return -EBUSY; } - /* hold on to real dev for MAP data */ - dev_hold(real_dev); - for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) INIT_HLIST_HEAD(&port->muxed_ep[entry]); @@ -99,28 +75,33 @@ static int rmnet_register_real_device(struct net_device *real_dev) return 0; } -static void rmnet_unregister_bridge(struct net_device *dev, - struct rmnet_port *port) +static void rmnet_unregister_bridge(struct rmnet_port *port) { - struct rmnet_port *bridge_port; - struct net_device *bridge_dev; + struct net_device *bridge_dev, *real_dev, *rmnet_dev; + struct rmnet_port *real_port; if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) return; - /* bridge slave handling */ + rmnet_dev = port->rmnet_dev; if (!port->nr_rmnet_devs) { - bridge_dev = port->bridge_ep; + /* bridge device */ + real_dev = port->bridge_ep; + bridge_dev = port->dev; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - bridge_port->bridge_ep = NULL; - bridge_port->rmnet_mode = RMNET_EPMODE_VND; + real_port = rmnet_get_port_rtnl(real_dev); + real_port->bridge_ep = NULL; + real_port->rmnet_mode = RMNET_EPMODE_VND; } else { + /* real device */ bridge_dev = port->bridge_ep; - bridge_port = rmnet_get_port_rtnl(bridge_dev); - rmnet_unregister_real_device(bridge_dev, bridge_port); + port->bridge_ep = NULL; + port->rmnet_mode = RMNET_EPMODE_VND; } + + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + rmnet_unregister_real_device(bridge_dev); } static int rmnet_newlink(struct net *src_net, struct net_device *dev, @@ -135,6 +116,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; @@ -157,7 +143,12 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1; + err = netdev_upper_dev_link(real_dev, dev, extack); + if (err < 0) + goto err2; + port->rmnet_mode = mode; + port->rmnet_dev = dev; hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -173,8 +164,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, return 0; +err2: + unregister_netdevice(dev); + rmnet_vnd_dellink(mux_id, port, ep); err1: - rmnet_unregister_real_device(real_dev, port); + rmnet_unregister_real_device(real_dev); err0: kfree(ep); return err; @@ -183,77 +177,74 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct rmnet_priv *priv = netdev_priv(dev); - struct net_device *real_dev; + struct net_device *real_dev, *bridge_dev; + struct rmnet_port *real_port, *bridge_port; struct rmnet_endpoint *ep; - struct rmnet_port *port; - u8 mux_id; + u8 mux_id = priv->mux_id; real_dev = priv->real_dev; - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + if (!rmnet_is_real_dev_registered(real_dev)) return; - port = rmnet_get_port_rtnl(real_dev); - - mux_id = rmnet_vnd_get_mux(dev); + real_port = rmnet_get_port_rtnl(real_dev); + bridge_dev = real_port->bridge_ep; + if (bridge_dev) { + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_unregister_bridge(bridge_port); + } - ep = rmnet_get_endpoint(port, mux_id); + ep = rmnet_get_endpoint(real_port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); - rmnet_unregister_bridge(dev, port); - rmnet_vnd_dellink(mux_id, port, ep); + rmnet_vnd_dellink(mux_id, real_port, ep); kfree(ep); } - rmnet_unregister_real_device(real_dev, port); + netdev_upper_dev_unlink(real_dev, dev); + rmnet_unregister_real_device(real_dev); unregister_netdevice_queue(dev, head); } -static void rmnet_force_unassociate_device(struct net_device *dev) +static void rmnet_force_unassociate_device(struct net_device *real_dev) { - struct net_device *real_dev = dev; struct hlist_node *tmp_ep; struct rmnet_endpoint *ep; struct rmnet_port *port; unsigned long bkt_ep; LIST_HEAD(list); - if (!rmnet_is_real_dev_registered(real_dev)) - return; - - ASSERT_RTNL(); - - port = rmnet_get_port_rtnl(dev); - - rcu_read_lock(); - rmnet_unregister_bridge(dev, port); - - hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { - unregister_netdevice_queue(ep->egress_dev, &list); - rmnet_vnd_dellink(ep->mux_id, port, ep); + port = rmnet_get_port_rtnl(real_dev); - hlist_del_init_rcu(&ep->hlnode); - kfree(ep); + if (port->nr_rmnet_devs) { + /* real device */ + rmnet_unregister_bridge(port); + hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + unregister_netdevice_queue(ep->egress_dev, &list); + netdev_upper_dev_unlink(real_dev, ep->egress_dev); + rmnet_vnd_dellink(ep->mux_id, port, ep); + hlist_del_init_rcu(&ep->hlnode); + kfree(ep); + } + rmnet_unregister_real_device(real_dev); + unregister_netdevice_many(&list); + } else { + rmnet_unregister_bridge(port); } - - rcu_read_unlock(); - unregister_netdevice_many(&list); - - rmnet_unregister_real_device(real_dev, port); } static int rmnet_config_notify_cb(struct notifier_block *nb, unsigned long event, void *data) { - struct net_device *dev = netdev_notifier_info_to_dev(data); + struct net_device *real_dev = netdev_notifier_info_to_dev(data); - if (!dev) + if (!rmnet_is_real_dev_registered(real_dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UNREGISTER: - netdev_dbg(dev, "Kernel unregister\n"); - rmnet_force_unassociate_device(dev); + netdev_dbg(real_dev, "Kernel unregister\n"); + rmnet_force_unassociate_device(real_dev); break; default: @@ -295,16 +286,18 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (!dev) return -ENODEV; - real_dev = __dev_get_by_index(dev_net(dev), - nla_get_u32(tb[IFLA_LINK])); - - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + real_dev = priv->real_dev; + if (!rmnet_is_real_dev_registered(real_dev)) return -ENODEV; port = rmnet_get_port_rtnl(real_dev); if (data[IFLA_RMNET_MUX_ID]) { mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); + if (rmnet_get_endpoint(port, mux_id)) { + NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists"); + return -EINVAL; + } ep = rmnet_get_endpoint(port, priv->mux_id); if (!ep) return -ENODEV; @@ -379,11 +372,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = { .fill_info = rmnet_fill_info, }; -/* Needs either rcu_read_lock() or rtnl lock */ -struct rmnet_port *rmnet_get_port(struct net_device *real_dev) +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev) { if (rmnet_is_real_dev_registered(real_dev)) - return rcu_dereference_rtnl(real_dev->rx_handler_data); + return rcu_dereference_bh(real_dev->rx_handler_data); else return NULL; } @@ -409,7 +401,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, struct rmnet_port *port, *slave_port; int err; - port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); /* If there is more than one rmnet dev attached, its probably being * used for muxing. Skip the briding in that case @@ -417,6 +409,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (port->nr_rmnet_devs > 1) return -EINVAL; + if (port->rmnet_mode != RMNET_EPMODE_VND) + return -EINVAL; + if (rmnet_is_real_dev_registered(slave_dev)) return -EBUSY; @@ -424,9 +419,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY; - slave_port = rmnet_get_port(slave_dev); + err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) { + rmnet_unregister_real_device(slave_dev); + return err; + } + + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; + slave_port->rmnet_dev = rmnet_dev; port->rmnet_mode = RMNET_EPMODE_BRIDGE; port->bridge_ep = slave_dev; @@ -438,16 +441,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev) { - struct rmnet_priv *priv = netdev_priv(rmnet_dev); - struct net_device *real_dev = priv->real_dev; - struct rmnet_port *port, *slave_port; + struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev); - port = rmnet_get_port(real_dev); - port->rmnet_mode = RMNET_EPMODE_VND; - port->bridge_ep = NULL; - - slave_port = rmnet_get_port(slave_dev); - rmnet_unregister_real_device(slave_dev, slave_port); + rmnet_unregister_bridge(port); netdev_dbg(slave_dev, "removed from rmnet as slave\n"); return 0; @@ -473,8 +469,8 @@ static int __init rmnet_init(void) static void __exit rmnet_exit(void) { - unregister_netdevice_notifier(&rmnet_dev_notifier); rtnl_link_unregister(&rmnet_link_ops); + unregister_netdevice_notifier(&rmnet_dev_notifier); } module_init(rmnet_init) diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index cd0a6bcbe74a..be515982d628 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -28,6 +28,7 @@ struct rmnet_port { u8 rmnet_mode; struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; struct net_device *bridge_ep; + struct net_device *rmnet_dev; }; extern struct rtnl_link_ops rmnet_link_ops; @@ -65,7 +66,7 @@ struct rmnet_priv { struct rmnet_priv_stats stats; }; -struct rmnet_port *rmnet_get_port(struct net_device *real_dev); +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev); struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); int rmnet_add_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev, diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 1b74bc160402..29a7bfa2584d 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -159,6 +159,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, static void rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) { + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + if (bridge_dev) { skb->dev = bridge_dev; dev_queue_xmit(skb); @@ -184,7 +187,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) return RX_HANDLER_PASS; dev = skb->dev; - port = rmnet_get_port(dev); + port = rmnet_get_port_rcu(dev); switch (port->rmnet_mode) { case RMNET_EPMODE_VND: @@ -217,7 +220,7 @@ void rmnet_egress_handler(struct sk_buff *skb) skb->dev = priv->real_dev; mux_id = priv->mux_id; - port = rmnet_get_port(skb->dev); + port = rmnet_get_port_rcu(skb->dev); if (!port) goto drop; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 509dfc895a33..26ad40f19c64 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -266,14 +266,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, return 0; } -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - priv = netdev_priv(rmnet_dev); - return priv->mux_id; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 54cbaf3c3bc4..14d77c709d4a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -16,6 +16,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); void rmnet_vnd_setup(struct net_device *dev); #endif /* _RMNET_VND_H_ */ diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 67a4d5d45e3a..2d2d22f86dc6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3865,15 +3865,18 @@ static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_12: case RTL_GIGA_MAC_VER_17: + pcie_set_readrq(tp->pci_dev, 512); r8168b_1_hw_jumbo_enable(tp); break; case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: + pcie_set_readrq(tp->pci_dev, 512); r8168c_hw_jumbo_enable(tp); break; case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28: r8168dp_hw_jumbo_enable(tp); break; case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_33: + pcie_set_readrq(tp->pci_dev, 512); r8168e_hw_jumbo_enable(tp); break; default: @@ -3903,6 +3906,9 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) break; } rtl_lock_config_regs(tp); + + if (pci_is_pcie(tp->pci_dev) && tp->supports_gmii) + pcie_set_readrq(tp->pci_dev, 4096); } static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu) @@ -6825,6 +6831,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int chipset, region; int jumbo_max, rc; + /* Some tools for creating an initramfs don't consider softdeps, then + * r8169.ko may be in initramfs, but realtek.ko not. Then the generic + * PHY driver is used that doesn't work with most chip versions. + */ + if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) { + dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n"); + return -ENOENT; + } + dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); if (!dev) return -ENOMEM; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 7a38d7f282a1..bc6c7f3f804d 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -525,6 +525,7 @@ efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index af15a737c675..59b4f16896a8 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -560,13 +560,45 @@ efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx, u32 nic_major, u32 nic_minor, s32 correction) { + u32 sync_timestamp; ktime_t kt = { 0 }; + s16 delta; if (!(nic_major & 0x80000000)) { WARN_ON_ONCE(nic_major >> 16); - /* Use the top bits from the latest sync event. */ - nic_major &= 0xffff; - nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + /* Medford provides 48 bits of timestamp, so we must get the top + * 16 bits from the timesync event state. + * + * We only have the lower 16 bits of the time now, but we do + * have a full resolution timestamp at some point in past. As + * long as the difference between the (real) now and the sync + * is less than 2^15, then we can reconstruct the difference + * between those two numbers using only the lower 16 bits of + * each. + * + * Put another way + * + * a - b = ((a mod k) - b) mod k + * + * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know + * (a mod k) and b, so can calculate the delta, a - b. + * + */ + sync_timestamp = last_sync_timestamp_major(efx); + + /* Because delta is s16 this does an implicit mask down to + * 16 bits which is what we need, assuming + * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that + * we can deal with the (unlikely) case of sync timestamps + * arriving from the future. + */ + delta = nic_major - sync_timestamp; + + /* Recover the fully specified time now, by applying the offset + * to the (fully specified) sync time. + */ + nic_major = sync_timestamp + delta; kt = ptp->nic_to_kernel_time(nic_major, nic_minor, correction); diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 8d88e4083456..7b65e79d6ae9 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 869a498e3b5e..56c0e643f430 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -929,7 +929,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) struct netsec_rx_pkt_info rx_info; enum dma_data_direction dma_dir; struct bpf_prog *xdp_prog; - struct sk_buff *skb = NULL; u16 xdp_xmit = 0; u32 xdp_act = 0; int done = 0; @@ -943,7 +942,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); struct netsec_desc *desc = &dring->desc[idx]; struct page *page = virt_to_page(desc->addr); - u32 xdp_result = XDP_PASS; + u32 xdp_result = NETSEC_XDP_PASS; + struct sk_buff *skb = NULL; u16 pkt_len, desc_len; dma_addr_t dma_handle; struct xdp_buff xdp; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 7ec895407d23..e0a5fe83d8e0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -413,6 +413,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) dll_lock = rgmii_readl(ethqos, SDC4_STATUS); if (dll_lock & SDC4_STATUS_DLL_LOCK) break; + retry--; } while (retry > 0); if (!retry) dev_err(ðqos->pdev->dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index d0356fbd1e43..542784300620 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -24,6 +24,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw, struct net_device *dev) { + struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONTROL); int mtu = dev->mtu; @@ -35,7 +36,7 @@ static void dwmac1000_core_init(struct mac_device_info *hw, * Broadcom tags can look like invalid LLC/SNAP packets and cause the * hardware to truncate packets on reception. */ - if (netdev_uses_dsa(dev)) + if (netdev_uses_dsa(dev) || !priv->plat->enh_desc) value &= ~GMAC_CONTROL_ACS; if (mtu > 1500) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 40ca00e596dd..53be936137d0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -420,7 +420,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, value |= GMAC_PACKET_FILTER_PM; /* Set all the bits of the HASH tab */ memset(mc_filter, 0xff, sizeof(mc_filter)); - } else if (!netdev_mc_empty(dev)) { + } else if (!netdev_mc_empty(dev) && (dev->flags & IFF_MULTICAST)) { struct netdev_hw_addr *ha; /* Hash filter for multicast */ @@ -736,11 +736,14 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, __le16 perfect_match, bool is_double) { void __iomem *ioaddr = hw->pcsr; + u32 value; writel(hash, ioaddr + GMAC_VLAN_HASH_TABLE); + value = readl(ioaddr + GMAC_VLAN_TAG); + if (hash) { - u32 value = GMAC_VLAN_VTHM | GMAC_VLAN_ETV; + value |= GMAC_VLAN_VTHM | GMAC_VLAN_ETV; if (is_double) { value |= GMAC_VLAN_EDVLP; value |= GMAC_VLAN_ESVL; @@ -759,8 +762,6 @@ static void dwmac4_update_vlan_hash(struct mac_device_info *hw, u32 hash, writel(value | perfect_match, ioaddr + GMAC_VLAN_TAG); } else { - u32 value = readl(ioaddr + GMAC_VLAN_TAG); - value &= ~(GMAC_VLAN_VTHM | GMAC_VLAN_ETV); value &= ~(GMAC_VLAN_EDVLP | GMAC_VLAN_ESVL); value &= ~GMAC_VLAN_DOVLTC; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 082f5ee9e525..e9bf54a579df 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -458,7 +458,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, for (i = 0; i < XGMAC_MAX_HASH_TABLE; i++) writel(~0x0, ioaddr + XGMAC_HASH_TABLE(i)); - } else if (!netdev_mc_empty(dev)) { + } else if (!netdev_mc_empty(dev) && (dev->flags & IFF_MULTICAST)) { struct netdev_hw_addr *ha; value |= XGMAC_FILTER_HMC; @@ -569,7 +569,9 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, writel(value, ioaddr + XGMAC_PACKET_FILTER); - value = XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV; + value = readl(ioaddr + XGMAC_VLAN_TAG); + + value |= XGMAC_VLAN_VTHM | XGMAC_VLAN_ETV; if (is_double) { value |= XGMAC_VLAN_EDVLP; value |= XGMAC_VLAN_ESVL; @@ -584,7 +586,9 @@ static void dwxgmac2_update_vlan_hash(struct mac_device_info *hw, u32 hash, writel(value, ioaddr + XGMAC_PACKET_FILTER); - value = XGMAC_VLAN_ETV; + value = readl(ioaddr + XGMAC_VLAN_TAG); + + value |= XGMAC_VLAN_ETV; if (is_double) { value |= XGMAC_VLAN_EDVLP; value |= XGMAC_VLAN_ESVL; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 80d59b775907..caa4d4c687b9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4289,6 +4289,8 @@ static void stmmac_init_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + rtnl_lock(); + /* Create per netdev entries */ priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir); @@ -4300,14 +4302,13 @@ static void stmmac_init_fs(struct net_device *dev) debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev, &stmmac_dma_cap_fops); - register_netdevice_notifier(&stmmac_notifier); + rtnl_unlock(); } static void stmmac_exit_fs(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(priv->dbgfs_dir); } #endif /* CONFIG_DEBUG_FS */ @@ -4825,14 +4826,14 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); -#ifdef CONFIG_DEBUG_FS - stmmac_exit_fs(ndev); -#endif stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); +#ifdef CONFIG_DEBUG_FS + stmmac_exit_fs(ndev); +#endif phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); @@ -4860,6 +4861,7 @@ int stmmac_suspend(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + u32 chan; if (!ndev || !netif_running(ndev)) return 0; @@ -4873,6 +4875,9 @@ int stmmac_suspend(struct device *dev) stmmac_disable_all_queues(priv); + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) + del_timer_sync(&priv->tx_queue[chan].txtimer); + /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); @@ -5048,6 +5053,7 @@ static int __init stmmac_init(void) /* Create debugfs main directory if it doesn't exist yet */ if (!stmmac_fs_dir) stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL); + register_netdevice_notifier(&stmmac_notifier); #endif return 0; @@ -5056,6 +5062,7 @@ static int __init stmmac_init(void) static void __exit stmmac_exit(void) { #ifdef CONFIG_DEBUG_FS + unregister_netdevice_notifier(&stmmac_notifier); debugfs_remove_recursive(stmmac_fs_dir); #endif } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 8237dbc3e991..d2bc04dedd7c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -96,7 +96,7 @@ static int stmmac_default_data(struct pci_dev *pdev, plat->bus_id = 1; plat->phy_addr = 0; - plat->interface = PHY_INTERFACE_MODE_GMII; + plat->phy_interface = PHY_INTERFACE_MODE_GMII; plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; @@ -220,7 +220,8 @@ static int ehl_sgmii_data(struct pci_dev *pdev, { plat->bus_id = 1; plat->phy_addr = 0; - plat->interface = PHY_INTERFACE_MODE_SGMII; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + return ehl_common_data(pdev, plat); } @@ -233,7 +234,8 @@ static int ehl_rgmii_data(struct pci_dev *pdev, { plat->bus_id = 1; plat->phy_addr = 0; - plat->interface = PHY_INTERFACE_MODE_RGMII; + plat->phy_interface = PHY_INTERFACE_MODE_RGMII; + return ehl_common_data(pdev, plat); } @@ -261,7 +263,7 @@ static int tgl_sgmii_data(struct pci_dev *pdev, { plat->bus_id = 1; plat->phy_addr = 0; - plat->interface = PHY_INTERFACE_MODE_SGMII; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; return tgl_common_data(pdev, plat); } @@ -361,7 +363,7 @@ static int quark_default_data(struct pci_dev *pdev, plat->bus_id = pci_dev_id(pdev); plat->phy_addr = ret; - plat->interface = PHY_INTERFACE_MODE_RMII; + plat->phy_interface = PHY_INTERFACE_MODE_RMII; plat->dma_cfg->pbl = 16; plat->dma_cfg->pblx8 = true; @@ -418,7 +420,7 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, plat->bus_id = 1; plat->phy_addr = -1; - plat->interface = PHY_INTERFACE_MODE_GMII; + plat->phy_interface = PHY_INTERFACE_MODE_GMII; plat->dma_cfg->pbl = 32; plat->dma_cfg->pblx8 = true; diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 276292bca334..53fb8141f1a6 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -375,10 +375,14 @@ struct temac_local { int tx_bd_next; int tx_bd_tail; int rx_bd_ci; + int rx_bd_tail; /* DMA channel control setup */ u32 tx_chnl_ctrl; u32 rx_chnl_ctrl; + u8 coalesce_count_rx; + + struct delayed_work restart_work; }; /* Wrappers for temac_ior()/temac_iow() function pointers above */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 21c1b4322ea7..eb480204cdbe 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -367,6 +368,8 @@ static int temac_dma_bd_init(struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) + goto out; lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr); lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); @@ -387,12 +390,13 @@ static int temac_dma_bd_init(struct net_device *ndev) lp->tx_bd_next = 0; lp->tx_bd_tail = 0; lp->rx_bd_ci = 0; + lp->rx_bd_tail = RX_BD_NUM - 1; /* Enable RX DMA transfers */ wmb(); lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p); lp->dma_out(lp, RX_TAILDESC_PTR, - lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail)); /* Prepare for TX DMA transfer */ lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p); @@ -788,6 +792,9 @@ static void temac_start_xmit_done(struct net_device *ndev) stat = be32_to_cpu(cur_p->app0); } + /* Matches barrier in temac_start_xmit */ + smp_mb(); + netif_wake_queue(ndev); } @@ -830,9 +837,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (temac_check_tx_bd_space(lp, num_frag + 1)) { - if (!netif_queue_stopped(ndev)) - netif_stop_queue(ndev); - return NETDEV_TX_BUSY; + if (netif_queue_stopped(ndev)) + return NETDEV_TX_BUSY; + + netif_stop_queue(ndev); + + /* Matches barrier in temac_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (temac_check_tx_bd_space(lp, num_frag)) + return NETDEV_TX_BUSY; + + netif_wake_queue(ndev); } cur_p->app0 = 0; @@ -850,12 +867,16 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, skb_headlen(skb), DMA_TO_DEVICE); cur_p->len = cpu_to_be32(skb_headlen(skb)); + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) { + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; + } cur_p->phys = cpu_to_be32(skb_dma_addr); ptr_to_txbd((void *)skb, cur_p); for (ii = 0; ii < num_frag; ii++) { - lp->tx_bd_tail++; - if (lp->tx_bd_tail >= TX_BD_NUM) + if (++lp->tx_bd_tail >= TX_BD_NUM) lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; @@ -863,6 +884,27 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_frag_address(frag), skb_frag_size(frag), DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) { + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + while (--ii >= 0) { + --frag; + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_frag_size(frag), + DMA_TO_DEVICE); + if (--lp->tx_bd_tail < 0) + lp->tx_bd_tail = TX_BD_NUM - 1; + cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; + } + dma_unmap_single(ndev->dev.parent, + be32_to_cpu(cur_p->phys), + skb_headlen(skb), DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; + } cur_p->phys = cpu_to_be32(skb_dma_addr); cur_p->len = cpu_to_be32(skb_frag_size(frag)); cur_p->app0 = 0; @@ -884,31 +926,56 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } +static int ll_temac_recv_buffers_available(struct temac_local *lp) +{ + int available; + + if (!lp->rx_skb[lp->rx_bd_ci]) + return 0; + available = 1 + lp->rx_bd_tail - lp->rx_bd_ci; + if (available <= 0) + available += RX_BD_NUM; + return available; +} static void ll_temac_recv(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); - struct sk_buff *skb, *new_skb; - unsigned int bdstat; - struct cdmac_bd *cur_p; - dma_addr_t tail_p, skb_dma_addr; - int length; unsigned long flags; + int rx_bd; + bool update_tail = false; spin_lock_irqsave(&lp->rx_lock, flags); - tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - - bdstat = be32_to_cpu(cur_p->app0); - while ((bdstat & STS_CTRL_APP0_CMPLT)) { + /* Process all received buffers, passing them on network + * stack. After this, the buffer descriptors will be in an + * un-allocated stage, where no skb is allocated for it, and + * they are therefore not available for TEMAC/DMA. + */ + do { + struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci]; + struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci]; + unsigned int bdstat = be32_to_cpu(bd->app0); + int length; + + /* While this should not normally happen, we can end + * here when GFP_ATOMIC allocations fail, and we + * therefore have un-allocated buffers. + */ + if (!skb) + break; - skb = lp->rx_skb[lp->rx_bd_ci]; - length = be32_to_cpu(cur_p->app4) & 0x3FFF; + /* Loop over all completed buffer descriptors */ + if (!(bdstat & STS_CTRL_APP0_CMPLT)) + break; - dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), + dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys), XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + /* The buffer is not valid for DMA anymore */ + bd->phys = 0; + bd->len = 0; + length = be32_to_cpu(bd->app4) & 0x3FFF; skb_put(skb, length); skb->protocol = eth_type_trans(skb, ndev); skb_checksum_none_assert(skb); @@ -923,43 +990,102 @@ static void ll_temac_recv(struct net_device *ndev) * (back) for proper IP checksum byte order * (be16). */ - skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF); + skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF); skb->ip_summed = CHECKSUM_COMPLETE; } if (!skb_defer_rx_timestamp(skb)) netif_rx(skb); + /* The skb buffer is now owned by network stack above */ + lp->rx_skb[lp->rx_bd_ci] = NULL; ndev->stats.rx_packets++; ndev->stats.rx_bytes += length; - new_skb = netdev_alloc_skb_ip_align(ndev, - XTE_MAX_JUMBO_FRAME_SIZE); - if (!new_skb) { - spin_unlock_irqrestore(&lp->rx_lock, flags); - return; + rx_bd = lp->rx_bd_ci; + if (++lp->rx_bd_ci >= RX_BD_NUM) + lp->rx_bd_ci = 0; + } while (rx_bd != lp->rx_bd_tail); + + /* DMA operations will halt when the last buffer descriptor is + * processed (ie. the one pointed to by RX_TAILDESC_PTR). + * When that happens, no more interrupt events will be + * generated. No IRQ_COAL or IRQ_DLY, and not even an + * IRQ_ERR. To avoid stalling, we schedule a delayed work + * when there is a potential risk of that happening. The work + * will call this function, and thus re-schedule itself until + * enough buffers are available again. + */ + if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx) + schedule_delayed_work(&lp->restart_work, HZ / 1000); + + /* Allocate new buffers for those buffer descriptors that were + * passed to network stack. Note that GFP_ATOMIC allocations + * can fail (e.g. when a larger burst of GFP_ATOMIC + * allocations occurs), so while we try to allocate all + * buffers in the same interrupt where they were processed, we + * continue with what we could get in case of allocation + * failure. Allocation of remaining buffers will be retried + * in following calls. + */ + while (1) { + struct sk_buff *skb; + struct cdmac_bd *bd; + dma_addr_t skb_dma_addr; + + rx_bd = lp->rx_bd_tail + 1; + if (rx_bd >= RX_BD_NUM) + rx_bd = 0; + bd = &lp->rx_bd_v[rx_bd]; + + if (bd->phys) + break; /* All skb's allocated */ + + skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE); + if (!skb) { + dev_warn(&ndev->dev, "skb alloc failed\n"); + break; } - cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); - skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data, + skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data, XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); - cur_p->phys = cpu_to_be32(skb_dma_addr); - cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); - lp->rx_skb[lp->rx_bd_ci] = new_skb; + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, + skb_dma_addr))) { + dev_kfree_skb_any(skb); + break; + } - lp->rx_bd_ci++; - if (lp->rx_bd_ci >= RX_BD_NUM) - lp->rx_bd_ci = 0; + bd->phys = cpu_to_be32(skb_dma_addr); + bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE); + bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND); + lp->rx_skb[rx_bd] = skb; - cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; - bdstat = be32_to_cpu(cur_p->app0); + lp->rx_bd_tail = rx_bd; + update_tail = true; + } + + /* Move tail pointer when buffers have been allocated */ + if (update_tail) { + lp->dma_out(lp, RX_TAILDESC_PTR, + lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail); } - lp->dma_out(lp, RX_TAILDESC_PTR, tail_p); spin_unlock_irqrestore(&lp->rx_lock, flags); } +/* Function scheduled to ensure a restart in case of DMA halt + * condition caused by running out of buffer descriptors. + */ +static void ll_temac_restart_work_func(struct work_struct *work) +{ + struct temac_local *lp = container_of(work, struct temac_local, + restart_work.work); + struct net_device *ndev = lp->ndev; + + ll_temac_recv(ndev); +} + static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev) { struct net_device *ndev = _ndev; @@ -1052,6 +1178,8 @@ static int temac_stop(struct net_device *ndev) dev_dbg(&ndev->dev, "temac_close()\n"); + cancel_delayed_work_sync(&lp->restart_work); + free_irq(lp->tx_irq, ndev); free_irq(lp->rx_irq, ndev); @@ -1184,6 +1312,7 @@ static int temac_probe(struct platform_device *pdev) lp->dev = &pdev->dev; lp->options = XTE_OPTION_DEFAULTS; spin_lock_init(&lp->rx_lock); + INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func); /* Setup mutex for synchronization of indirect register access */ if (pdata) { @@ -1290,6 +1419,7 @@ static int temac_probe(struct platform_device *pdev) */ lp->tx_chnl_ctrl = 0x10220000; lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; /* Finished with the DMA node; drop the reference */ of_node_put(dma_np); @@ -1321,11 +1451,14 @@ static int temac_probe(struct platform_device *pdev) (pdata->tx_irq_count << 16); else lp->tx_chnl_ctrl = 0x10220000; - if (pdata->rx_irq_timeout || pdata->rx_irq_count) + if (pdata->rx_irq_timeout || pdata->rx_irq_count) { lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) | (pdata->rx_irq_count << 16); - else + lp->coalesce_count_rx = pdata->rx_irq_count; + } else { lp->rx_chnl_ctrl = 0xff070000; + lp->coalesce_count_rx = 0x07; + } } /* Error handle returned DMA RX and TX interrupts */ diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 6fc04ffb22c2..d4e095d0e8f1 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -517,25 +517,14 @@ static int ixp4xx_mdio_write(struct mii_bus *bus, int phy_id, int location, return ret; } -static int ixp4xx_mdio_register(void) +static int ixp4xx_mdio_register(struct eth_regs __iomem *regs) { int err; if (!(mdio_bus = mdiobus_alloc())) return -ENOMEM; - if (cpu_is_ixp43x()) { - /* IXP43x lacks NPE-B and uses NPE-C for MII PHY access */ - if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEC_ETH)) - return -ENODEV; - mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT; - } else { - /* All MII PHY accesses use NPE-B Ethernet registers */ - if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEB_ETH0)) - return -ENODEV; - mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; - } - + mdio_regs = regs; __raw_writel(DEFAULT_CORE_CNTRL, &mdio_regs->core_control); spin_lock_init(&mdio_lock); mdio_bus->name = "IXP4xx MII Bus"; @@ -1374,7 +1363,7 @@ static const struct net_device_ops ixp4xx_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static int eth_init_one(struct platform_device *pdev) +static int ixp4xx_eth_probe(struct platform_device *pdev) { struct port *port; struct net_device *dev; @@ -1384,7 +1373,7 @@ static int eth_init_one(struct platform_device *pdev) char phy_id[MII_BUS_ID_SIZE + 3]; int err; - if (!(dev = alloc_etherdev(sizeof(struct port)))) + if (!(dev = devm_alloc_etherdev(&pdev->dev, sizeof(struct port)))) return -ENOMEM; SET_NETDEV_DEV(dev, &pdev->dev); @@ -1394,20 +1383,51 @@ static int eth_init_one(struct platform_device *pdev) switch (port->id) { case IXP4XX_ETH_NPEA: + /* If the MDIO bus is not up yet, defer probe */ + if (!mdio_bus) + return -EPROBE_DEFER; port->regs = (struct eth_regs __iomem *)IXP4XX_EthA_BASE_VIRT; regs_phys = IXP4XX_EthA_BASE_PHYS; break; case IXP4XX_ETH_NPEB: + /* + * On all except IXP43x, NPE-B is used for the MDIO bus. + * If there is no NPE-B in the feature set, bail out, else + * register the MDIO bus. + */ + if (!cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEB_ETH0)) + return -ENODEV; + /* Else register the MDIO bus on NPE-B */ + if ((err = ixp4xx_mdio_register(IXP4XX_EthC_BASE_VIRT))) + return err; + } + if (!mdio_bus) + return -EPROBE_DEFER; port->regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT; regs_phys = IXP4XX_EthB_BASE_PHYS; break; case IXP4XX_ETH_NPEC: + /* + * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access, + * of there is no NPE-C, no bus, nothing works, so bail out. + */ + if (cpu_is_ixp43x()) { + if (!(ixp4xx_read_feature_bits() & + IXP4XX_FEATURE_NPEC_ETH)) + return -ENODEV; + /* Else register the MDIO bus on NPE-C */ + if ((err = ixp4xx_mdio_register(IXP4XX_EthC_BASE_VIRT))) + return err; + } + if (!mdio_bus) + return -EPROBE_DEFER; port->regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT; regs_phys = IXP4XX_EthC_BASE_PHYS; break; default: - err = -ENODEV; - goto err_free; + return -ENODEV; } dev->netdev_ops = &ixp4xx_netdev_ops; @@ -1416,10 +1436,8 @@ static int eth_init_one(struct platform_device *pdev) netif_napi_add(dev, &port->napi, eth_poll, NAPI_WEIGHT); - if (!(port->npe = npe_request(NPE_ID(port->id)))) { - err = -EIO; - goto err_free; - } + if (!(port->npe = npe_request(NPE_ID(port->id)))) + return -EIO; port->mem_res = request_mem_region(regs_phys, REGS_SIZE, dev->name); if (!port->mem_res) { @@ -1465,12 +1483,10 @@ static int eth_init_one(struct platform_device *pdev) release_resource(port->mem_res); err_npe_rel: npe_release(port->npe); -err_free: - free_netdev(dev); return err; } -static int eth_remove_one(struct platform_device *pdev) +static int ixp4xx_eth_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct phy_device *phydev = dev->phydev; @@ -1478,45 +1494,21 @@ static int eth_remove_one(struct platform_device *pdev) unregister_netdev(dev); phy_disconnect(phydev); + ixp4xx_mdio_remove(); npe_port_tab[NPE_ID(port->id)] = NULL; npe_release(port->npe); release_resource(port->mem_res); - free_netdev(dev); return 0; } static struct platform_driver ixp4xx_eth_driver = { .driver.name = DRV_NAME, - .probe = eth_init_one, - .remove = eth_remove_one, + .probe = ixp4xx_eth_probe, + .remove = ixp4xx_eth_remove, }; - -static int __init eth_init_module(void) -{ - int err; - - /* - * FIXME: we bail out on device tree boot but this really needs - * to be fixed in a nicer way: this registers the MDIO bus before - * even matching the driver infrastructure, we should only probe - * detected hardware. - */ - if (of_have_populated_dt()) - return -ENODEV; - if ((err = ixp4xx_mdio_register())) - return err; - return platform_driver_register(&ixp4xx_eth_driver); -} - -static void __exit eth_cleanup_module(void) -{ - platform_driver_unregister(&ixp4xx_eth_driver); - ixp4xx_mdio_remove(); -} +module_platform_driver(ixp4xx_eth_driver); MODULE_AUTHOR("Krzysztof Halasa"); MODULE_DESCRIPTION("Intel IXP4xx Ethernet driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:ixp4xx_eth"); -module_init(eth_init_module); -module_exit(eth_cleanup_module); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 9b3ba98726d7..3a53d222bfcc 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -767,12 +767,12 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) int i; gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index eab83e71567a..6c0732fc8c25 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -99,7 +99,7 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; - net_device->tx_disable = false; + net_device->tx_disable = true; net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f3f9eb8a402a..ee1ad7ae7555 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -977,6 +977,7 @@ static int netvsc_attach(struct net_device *ndev, } /* In any case device is now ready */ + nvdev->tx_disable = false; netif_device_attach(ndev); /* Note: enable and attach happen when sub-channels setup */ @@ -2354,6 +2355,8 @@ static int netvsc_probe(struct hv_device *dev, else net->max_mtu = ETH_DATA_LEN; + nvdev->tx_disable = false; + ret = register_netdevice(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 30cd0c4f0be0..8801d093135c 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -293,6 +293,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } @@ -498,19 +499,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index a70662261a5a..f195f278a83a 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -164,7 +164,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -178,7 +177,7 @@ static int ipvlan_open(struct net_device *dev) ipvlan_ht_addr_add(ipvlan, addr); rcu_read_unlock(); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -190,8 +189,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - rcu_read_lock(); list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index afd8b2a08245..57ef24546e96 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -532,6 +532,11 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) return (struct macsec_eth_header *)skb_mac_header(skb); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static u32 tx_sa_update_pn(struct macsec_tx_sa *tx_sa, struct macsec_secy *secy) { u32 pn; @@ -2903,6 +2908,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: ether_addr_copy(dev->dev_addr, addr->sa_data); + macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); return 0; } @@ -2977,6 +2983,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, @@ -3176,11 +3183,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c5bf61565726..26f6be4796c7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -334,6 +334,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); consume_skb(skb); + + cond_resched(); } } diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 6aeed0c600f8..7971dc4f54f1 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -17,6 +17,7 @@ static DEFINE_IDA(nsim_bus_dev_ids); static LIST_HEAD(nsim_bus_dev_list); static DEFINE_MUTEX(nsim_bus_dev_list_lock); +static bool nsim_bus_enable; static struct nsim_bus_dev *to_nsim_bus_dev(struct device *dev) { @@ -28,7 +29,7 @@ static int nsim_bus_dev_vfs_enable(struct nsim_bus_dev *nsim_bus_dev, { nsim_bus_dev->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config), - GFP_KERNEL); + GFP_KERNEL | __GFP_NOWARN); if (!nsim_bus_dev->vfconfigs) return -ENOMEM; nsim_bus_dev->num_vfs = num_vfs; @@ -96,13 +97,25 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + struct nsim_dev *nsim_dev = dev_get_drvdata(dev); + struct devlink *devlink; unsigned int port_index; int ret; + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; + + devlink = priv_to_devlink(nsim_dev); + + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, port_index); + devlink_reload_enable(devlink); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -113,13 +126,25 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); + struct nsim_dev *nsim_dev = dev_get_drvdata(dev); + struct devlink *devlink; unsigned int port_index; int ret; + /* Prevent to use nsim_bus_dev before initialization. */ + if (!smp_load_acquire(&nsim_bus_dev->init)) + return -EBUSY; ret = kstrtouint(buf, 0, &port_index); if (ret) return ret; + + devlink = priv_to_devlink(nsim_dev); + + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, port_index); + devlink_reload_enable(devlink); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -179,15 +204,30 @@ new_device_store(struct bus_type *bus, const char *buf, size_t count) pr_err("Format for adding new device is \"id port_count\" (uint uint).\n"); return -EINVAL; } - nsim_bus_dev = nsim_bus_dev_new(id, port_count); - if (IS_ERR(nsim_bus_dev)) - return PTR_ERR(nsim_bus_dev); mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + err = -EBUSY; + goto err; + } + + nsim_bus_dev = nsim_bus_dev_new(id, port_count); + if (IS_ERR(nsim_bus_dev)) { + err = PTR_ERR(nsim_bus_dev); + goto err; + } + + /* Allow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, true); + list_add_tail(&nsim_bus_dev->list, &nsim_bus_dev_list); mutex_unlock(&nsim_bus_dev_list_lock); return count; +err: + mutex_unlock(&nsim_bus_dev_list_lock); + return err; } static BUS_ATTR_WO(new_device); @@ -215,6 +255,11 @@ del_device_store(struct bus_type *bus, const char *buf, size_t count) err = -ENOENT; mutex_lock(&nsim_bus_dev_list_lock); + /* Prevent to use resource before initialization. */ + if (!smp_load_acquire(&nsim_bus_enable)) { + mutex_unlock(&nsim_bus_dev_list_lock); + return -EBUSY; + } list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { if (nsim_bus_dev->dev.id != id) continue; @@ -284,6 +329,9 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) nsim_bus_dev->dev.type = &nsim_bus_dev_type; nsim_bus_dev->port_count = port_count; nsim_bus_dev->initial_net = current->nsproxy->net_ns; + mutex_init(&nsim_bus_dev->nsim_bus_reload_lock); + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); err = device_register(&nsim_bus_dev->dev); if (err) @@ -299,6 +347,8 @@ nsim_bus_dev_new(unsigned int id, unsigned int port_count) static void nsim_bus_dev_del(struct nsim_bus_dev *nsim_bus_dev) { + /* Disallow using nsim_bus_dev */ + smp_store_release(&nsim_bus_dev->init, false); device_unregister(&nsim_bus_dev->dev); ida_free(&nsim_bus_dev_ids, nsim_bus_dev->dev.id); kfree(nsim_bus_dev); @@ -320,6 +370,8 @@ int nsim_bus_init(void) err = driver_register(&nsim_driver); if (err) goto err_bus_unregister; + /* Allow using resources */ + smp_store_release(&nsim_bus_enable, true); return 0; err_bus_unregister: @@ -331,12 +383,16 @@ void nsim_bus_exit(void) { struct nsim_bus_dev *nsim_bus_dev, *tmp; + /* Disallow using resources */ + smp_store_release(&nsim_bus_enable, false); + mutex_lock(&nsim_bus_dev_list_lock); list_for_each_entry_safe(nsim_bus_dev, tmp, &nsim_bus_dev_list, list) { list_del(&nsim_bus_dev->list); nsim_bus_dev_del(nsim_bus_dev); } mutex_unlock(&nsim_bus_dev_list_lock); + driver_unregister(&nsim_driver); bus_unregister(&nsim_bus); } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 4b39aba2e9c4..54bc089550b3 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -73,7 +73,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = { static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { - char dev_ddir_name[16]; + char dev_ddir_name[sizeof(DRV_NAME) + 10]; sprintf(dev_ddir_name, DRV_NAME "%u", nsim_dev->nsim_bus_dev->dev.id); nsim_dev->ddir = debugfs_create_dir(dev_ddir_name, nsim_dev_ddir); @@ -88,8 +88,11 @@ static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) &nsim_dev->max_macs); debugfs_create_bool("test1", 0600, nsim_dev->ddir, &nsim_dev->test1); - debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, - &nsim_dev_take_snapshot_fops); + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); debugfs_create_bool("dont_allow_reload", 0600, nsim_dev->ddir, &nsim_dev->dont_allow_reload); debugfs_create_bool("fail_reload", 0600, nsim_dev->ddir, @@ -740,6 +743,11 @@ static int nsim_dev_reload_create(struct nsim_dev *nsim_dev, if (err) goto err_health_exit; + nsim_dev->take_snapshot = debugfs_create_file("take_snapshot", + 0200, + nsim_dev->ddir, + nsim_dev, + &nsim_dev_take_snapshot_fops); return 0; err_health_exit: @@ -853,6 +861,7 @@ static void nsim_dev_reload_destroy(struct nsim_dev *nsim_dev) if (devlink_is_reload_failed(devlink)) return; + debugfs_remove(nsim_dev->take_snapshot); nsim_dev_port_del_all(nsim_dev); nsim_dev_health_exit(nsim_dev); nsim_dev_traps_exit(devlink); diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c index 9aa637d162eb..c06e0f8fbc10 100644 --- a/drivers/net/netdevsim/health.c +++ b/drivers/net/netdevsim/health.c @@ -82,7 +82,7 @@ static int nsim_dev_dummy_fmsg_put(struct devlink_fmsg *fmsg, u32 binary_len) if (err) return err; - binary = kmalloc(binary_len, GFP_KERNEL); + binary = kmalloc(binary_len, GFP_KERNEL | __GFP_NOWARN); if (!binary) return -ENOMEM; get_random_bytes(binary, binary_len); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 94df795ef4d3..2eb7b0dc1594 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -160,6 +160,7 @@ struct nsim_dev { struct nsim_trap_data *trap_data; struct dentry *ddir; struct dentry *ports_ddir; + struct dentry *take_snapshot; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; u32 bpf_bind_verifier_delay; @@ -240,6 +241,9 @@ struct nsim_bus_dev { */ unsigned int num_vfs; struct nsim_vf_config *vfconfigs; + /* Lock for devlink->reload_enabled in netdevsim module */ + struct mutex nsim_bus_reload_lock; + bool init; }; int nsim_bus_init(void); diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index aee62610bade..481cf48c9b9e 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -489,6 +489,14 @@ static int at803x_probe(struct phy_device *phydev) return at803x_parse_dt(phydev); } +static void at803x_remove(struct phy_device *phydev) +{ + struct at803x_priv *priv = phydev->priv; + + if (priv->vddio) + regulator_disable(priv->vddio); +} + static int at803x_clk_out_config(struct phy_device *phydev) { struct at803x_priv *priv = phydev->priv; @@ -711,6 +719,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8035", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, @@ -726,6 +735,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8030", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, @@ -741,6 +751,7 @@ static struct phy_driver at803x_driver[] = { .name = "Qualcomm Atheros AR8031/AR8033", .phy_id_mask = AT803X_PHY_ID_MASK, .probe = at803x_probe, + .remove = at803x_remove, .config_init = at803x_config_init, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c index 23f1958ba6ad..459fb2069c7e 100644 --- a/drivers/net/phy/bcm63xx.c +++ b/drivers/net/phy/bcm63xx.c @@ -73,6 +73,7 @@ static struct phy_driver bcm63xx_driver[] = { /* same phy as above, with just a different OUI */ .phy_id = 0x002bdc00, .phy_id_mask = 0xfffffc00, + .name = "Broadcom BCM63XX (2)", /* PHY_BASIC_FEATURES */ .flags = PHY_IS_INTERNAL, .config_init = bcm63xx_config_init, diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 7c5265fd2b94..4190f9ed5313 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -212,16 +212,13 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np) */ gpiod = gpiod_get_from_of_node(fixed_link_node, "link-gpios", 0, GPIOD_IN, "mdio"); - of_node_put(fixed_link_node); - if (IS_ERR(gpiod)) { - if (PTR_ERR(gpiod) == -EPROBE_DEFER) - return gpiod; - + if (IS_ERR(gpiod) && PTR_ERR(gpiod) != -EPROBE_DEFER) { if (PTR_ERR(gpiod) != -ENOENT) pr_err("error getting GPIO for fixed link %pOF, proceed without\n", fixed_link_node); gpiod = NULL; } + of_node_put(fixed_link_node); return gpiod; } diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 7e9975d25066..f1ded03f0229 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -178,6 +178,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -188,6 +205,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index d5f8f351d9ef..3e38d15a67c6 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -310,11 +310,11 @@ enum rgmii_rx_clock_delay { BIT(VSC8531_FORCE_LED_OFF) | \ BIT(VSC8531_FORCE_LED_ON)) -#define MSCC_VSC8584_REVB_INT8051_FW "mscc_vsc8584_revb_int8051_fb48.bin" +#define MSCC_VSC8584_REVB_INT8051_FW "microchip/mscc_vsc8584_revb_int8051_fb48.bin" #define MSCC_VSC8584_REVB_INT8051_FW_START_ADDR 0xe800 #define MSCC_VSC8584_REVB_INT8051_FW_CRC 0xfb48 -#define MSCC_VSC8574_REVB_INT8051_FW "mscc_vsc8574_revb_int8051_29e8.bin" +#define MSCC_VSC8574_REVB_INT8051_FW "microchip/mscc_vsc8574_revb_int8051_29e8.bin" #define MSCC_VSC8574_REVB_INT8051_FW_START_ADDR 0x4000 #define MSCC_VSC8574_REVB_INT8051_FW_CRC 0x29e8 diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 80be4d691e5b..6f6ebe908ed8 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -702,7 +702,8 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) phy_trigger_machine(phydev); } - if (phy_clear_interrupt(phydev)) + /* did_interrupt() may have cleared the interrupt already */ + if (!phydev->drv->did_interrupt && phy_clear_interrupt(phydev)) goto phy_err; return IRQ_HANDLED; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b13c52873ef5..fe34e85c0e16 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -247,7 +247,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; if (netdev->wol_enabled) return false; @@ -267,7 +267,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) @@ -285,6 +286,8 @@ static int mdio_bus_phy_suspend(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = 1; + return phy_suspend(phydev); } @@ -293,9 +296,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = 0; + ret = phy_resume(phydev); if (ret < 0) return ret; diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 476db5345e1a..879ca37c8508 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -171,7 +171,9 @@ static int rtl8211c_config_init(struct phy_device *phydev) static int rtl8211f_config_init(struct phy_device *phydev) { + struct device *dev = &phydev->mdio.dev; u16 val; + int ret; /* enable TX-delay for rgmii-{id,txid}, and disable it for rgmii and * rgmii-rxid. The RX-delay can be enabled by the external RXDLY pin. @@ -189,7 +191,22 @@ static int rtl8211f_config_init(struct phy_device *phydev) return 0; } - return phy_modify_paged(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, val); + ret = phy_modify_paged_changed(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, + val); + if (ret < 0) { + dev_err(dev, "Failed to update the TX delay register\n"); + return ret; + } else if (ret) { + dev_dbg(dev, + "%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n", + val ? "Enabling" : "Disabling"); + } else { + dev_dbg(dev, + "2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n", + val ? "enabled" : "disabled"); + } + + return 0; } static int rtl8211e_config_init(struct phy_device *phydev) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index a7b9cf3269bf..29a0917a81e6 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 58a69f830d29..f78ceba42e57 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, struct cstate *cs = lcs->next; unsigned long deltaS, deltaA; short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 61d7e0d1d77d..8e56a41dd758 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -863,7 +863,10 @@ static int slip_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ca70a1d840eb..4004f98e50d9 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2240,6 +2240,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 9485c8d1de8a..5754bb6ca0ee 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -61,7 +61,6 @@ enum qmi_wwan_flags { enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ - QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ }; struct qmimux_hdr { @@ -338,6 +337,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } @@ -916,16 +918,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = { .data = QMI_WWAN_QUIRK_DTR, }; -static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { - .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, - .bind = qmi_wwan_bind, - .unbind = qmi_wwan_unbind, - .manage_power = qmi_wwan_manage_power, - .rx_fixup = qmi_wwan_rx_fixup, - .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, -}; - #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -946,14 +938,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) -/* Quectel does not use fixed interface numbers on at least some of their - * devices. We need to check the number of endpoints to ensure that we bind to - * the correct interface. +/* Many devices have QMI and DIAG functions which are distinguishable + * from other vendor specific functions by class, subclass and + * protocol all being 0xff. The DIAG function has exactly 2 endpoints + * and is silently rejected when probed. + * + * This makes it possible to match dynamically numbered QMI functions + * as seen on e.g. many Quectel modems. */ -#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ +#define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ - .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ @@ -1059,10 +1055,10 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1363,6 +1359,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ @@ -1454,7 +1451,6 @@ static int qmi_wwan_probe(struct usb_interface *intf, { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; - const struct driver_info *info; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@ -1490,12 +1486,8 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)id->driver_info; - - if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { - if (desc->bNumEndpoints == 2) - return -ENODEV; - } + if (desc->bNumEndpoints == 2) + return -ENODEV; return usbnet_probe(intf, id); } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3f425f974d03..e8e35c35fa96 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3220,6 +3220,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -5401,7 +5403,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -5538,7 +5543,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index aef7de225783..4ad0a0c33d85 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -245,6 +245,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ret = -ENOMEM; goto free_riptr; } + if (riptr != (u16)riptr || tiptr != (u16)tiptr) { + dev_err(priv->dev, "MURAM allocation out of addressable range\n"); + ret = -ENOMEM; + goto free_tiptr; + } /* Set RIPTR, TIPTR */ iowrite16be(riptr, &priv->ucc_pram->riptr); diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index 5643675ff724..bf78073ee7fd 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -62,11 +62,12 @@ static int x25_data_indication(struct net_device *dev, struct sk_buff *skb) { unsigned char *ptr; - skb_push(skb, 1); - if (skb_cow(skb, 1)) return NET_RX_DROP; + skb_push(skb, 1); + skb_reset_network_header(skb); + ptr = skb->data; *ptr = X25_IFACE_DATA; @@ -79,6 +80,13 @@ static int x25_data_indication(struct net_device *dev, struct sk_buff *skb) static void x25_data_transmit(struct net_device *dev, struct sk_buff *skb) { hdlc_device *hdlc = dev_to_hdlc(dev); + + skb_reset_network_header(skb); + skb->protocol = hdlc_type_trans(skb, dev); + + if (dev_nit_active(dev)) + dev_queue_xmit_nit(skb, dev); + hdlc->xmit(skb, dev); /* Ignore return value :-( */ } @@ -93,6 +101,7 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->data[0]) { case X25_IFACE_DATA: /* Data to be transmitted */ skb_pull(skb, 1); + skb_reset_network_header(skb); if ((result = lapb_data_request(dev, skb)) != LAPB_OK) dev_kfree_skb(skb); return NETDEV_TX_OK; diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index ea6ee6a608ce..e7619cec978a 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -258,7 +258,7 @@ struct port { struct hss_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ - u32 desc_tab_phys; + dma_addr_t desc_tab_phys; unsigned int id; unsigned int clock_type, clock_rate, loopback; unsigned int initialized, carrier; @@ -858,7 +858,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return NETDEV_TX_OK; } - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4); dev_kfree_skb(skb); #endif diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile new file mode 100644 index 000000000000..fc52b2cb500b --- /dev/null +++ b/drivers/net/wireguard/Makefile @@ -0,0 +1,18 @@ +ccflags-y := -O3 +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG +wireguard-y := main.o +wireguard-y += noise.o +wireguard-y += device.o +wireguard-y += peer.o +wireguard-y += timers.o +wireguard-y += queueing.o +wireguard-y += send.o +wireguard-y += receive.o +wireguard-y += socket.o +wireguard-y += peerlookup.o +wireguard-y += allowedips.o +wireguard-y += ratelimiter.o +wireguard-y += cookie.o +wireguard-y += netlink.o +obj-$(CONFIG_WIREGUARD) := wireguard.o diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c new file mode 100644 index 000000000000..3725e9cd85f4 --- /dev/null +++ b/drivers/net/wireguard/allowedips.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "allowedips.h" +#include "peer.h" + +static void swap_endian(u8 *dst, const u8 *src, u8 bits) +{ + if (bits == 32) { + *(u32 *)dst = be32_to_cpu(*(const __be32 *)src); + } else if (bits == 128) { + ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]); + ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]); + } +} + +static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src, + u8 cidr, u8 bits) +{ + node->cidr = cidr; + node->bit_at_a = cidr / 8U; +#ifdef __LITTLE_ENDIAN + node->bit_at_a ^= (bits / 8U - 1U) % 8U; +#endif + node->bit_at_b = 7U - (cidr % 8U); + node->bitlen = bits; + memcpy(node->bits, src, bits / 8U); +} +#define CHOOSE_NODE(parent, key) \ + parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] + +static void push_rcu(struct allowedips_node **stack, + struct allowedips_node __rcu *p, unsigned int *len) +{ + if (rcu_access_pointer(p)) { + WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); + stack[(*len)++] = rcu_dereference_raw(p); + } +} + +static void root_free_rcu(struct rcu_head *rcu) +{ + struct allowedips_node *node, *stack[128] = { + container_of(rcu, struct allowedips_node, rcu) }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + kfree(node); + } +} + +static void root_remove_peer_lists(struct allowedips_node *root) +{ + struct allowedips_node *node, *stack[128] = { root }; + unsigned int len = 1; + + while (len > 0 && (node = stack[--len])) { + push_rcu(stack, node->bit[0], &len); + push_rcu(stack, node->bit[1], &len); + if (rcu_access_pointer(node->peer)) + list_del(&node->peer_list); + } +} + +static void walk_remove_by_peer(struct allowedips_node __rcu **top, + struct wg_peer *peer, struct mutex *lock) +{ +#define REF(p) rcu_access_pointer(p) +#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) +#define PUSH(p) ({ \ + WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \ + stack[len++] = p; \ + }) + + struct allowedips_node __rcu **stack[128], **nptr; + struct allowedips_node *node, *prev; + unsigned int len; + + if (unlikely(!peer || !REF(*top))) + return; + + for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { + nptr = stack[len - 1]; + node = DEREF(nptr); + if (!node) { + --len; + continue; + } + if (!prev || REF(prev->bit[0]) == node || + REF(prev->bit[1]) == node) { + if (REF(node->bit[0])) + PUSH(&node->bit[0]); + else if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else if (REF(node->bit[0]) == prev) { + if (REF(node->bit[1])) + PUSH(&node->bit[1]); + } else { + if (rcu_dereference_protected(node->peer, + lockdep_is_held(lock)) == peer) { + RCU_INIT_POINTER(node->peer, NULL); + list_del_init(&node->peer_list); + if (!node->bit[0] || !node->bit[1]) { + rcu_assign_pointer(*nptr, DEREF( + &node->bit[!REF(node->bit[0])])); + kfree_rcu(node, rcu); + node = DEREF(nptr); + } + } + --len; + } + } + +#undef REF +#undef DEREF +#undef PUSH +} + +static unsigned int fls128(u64 a, u64 b) +{ + return a ? fls64(a) + 64U : fls64(b); +} + +static u8 common_bits(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + if (bits == 32) + return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key); + else if (bits == 128) + return 128U - fls128( + *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0], + *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]); + return 0; +} + +static bool prefix_matches(const struct allowedips_node *node, const u8 *key, + u8 bits) +{ + /* This could be much faster if it actually just compared the common + * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and + * the rest, but it turns out that common_bits is already super fast on + * modern processors, even taking into account the unfortunate bswap. + * So, we just inline it like this instead. + */ + return common_bits(node, key, bits) >= node->cidr; +} + +static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits, + const u8 *key) +{ + struct allowedips_node *node = trie, *found = NULL; + + while (node && prefix_matches(node, key, bits)) { + if (rcu_access_pointer(node->peer)) + found = node; + if (node->cidr == bits) + break; + node = rcu_dereference_bh(CHOOSE_NODE(node, key)); + } + return found; +} + +/* Returns a strong reference to a peer */ +static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits, + const void *be_ip) +{ + /* Aligned so it can be passed to fls/fls64 */ + u8 ip[16] __aligned(__alignof(u64)); + struct allowedips_node *node; + struct wg_peer *peer = NULL; + + swap_endian(ip, be_ip, bits); + + rcu_read_lock_bh(); +retry: + node = find_node(rcu_dereference_bh(root), bits, ip); + if (node) { + peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer)); + if (!peer) + goto retry; + } + rcu_read_unlock_bh(); + return peer; +} + +static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, + u8 cidr, u8 bits, struct allowedips_node **rnode, + struct mutex *lock) +{ + struct allowedips_node *node = rcu_dereference_protected(trie, + lockdep_is_held(lock)); + struct allowedips_node *parent = NULL; + bool exact = false; + + while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) { + parent = node; + if (parent->cidr == cidr) { + exact = true; + break; + } + node = rcu_dereference_protected(CHOOSE_NODE(parent, key), + lockdep_is_held(lock)); + } + *rnode = parent; + return exact; +} + +static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + struct allowedips_node *node, *parent, *down, *newnode; + + if (unlikely(cidr > bits || !peer)) + return -EINVAL; + + if (!rcu_access_pointer(*trie)) { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + RCU_INIT_POINTER(node->peer, peer); + list_add_tail(&node->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(node, key, cidr, bits); + rcu_assign_pointer(*trie, node); + return 0; + } + if (node_placement(*trie, key, cidr, bits, &node, lock)) { + rcu_assign_pointer(node->peer, peer); + list_move_tail(&node->peer_list, &peer->allowedips_list); + return 0; + } + + newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + if (unlikely(!newnode)) + return -ENOMEM; + RCU_INIT_POINTER(newnode->peer, peer); + list_add_tail(&newnode->peer_list, &peer->allowedips_list); + copy_and_assign_cidr(newnode, key, cidr, bits); + + if (!node) { + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + } else { + down = rcu_dereference_protected(CHOOSE_NODE(node, key), + lockdep_is_held(lock)); + if (!down) { + rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); + return 0; + } + } + cidr = min(cidr, common_bits(down, key, bits)); + parent = node; + + if (newnode->cidr == cidr) { + rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); + if (!parent) + rcu_assign_pointer(*trie, newnode); + else + rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), + newnode); + } else { + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) { + list_del(&newnode->peer_list); + kfree(newnode); + return -ENOMEM; + } + INIT_LIST_HEAD(&node->peer_list); + copy_and_assign_cidr(node, newnode->bits, cidr, bits); + + rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); + rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); + if (!parent) + rcu_assign_pointer(*trie, node); + else + rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), + node); + } + return 0; +} + +void wg_allowedips_init(struct allowedips *table) +{ + table->root4 = table->root6 = NULL; + table->seq = 1; +} + +void wg_allowedips_free(struct allowedips *table, struct mutex *lock) +{ + struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6; + + ++table->seq; + RCU_INIT_POINTER(table->root4, NULL); + RCU_INIT_POINTER(table->root6, NULL); + if (rcu_access_pointer(old4)) { + struct allowedips_node *node = rcu_dereference_protected(old4, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } + if (rcu_access_pointer(old6)) { + struct allowedips_node *node = rcu_dereference_protected(old6, + lockdep_is_held(lock)); + + root_remove_peer_lists(node); + call_rcu(&node->rcu, root_free_rcu); + } +} + +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls */ + u8 key[4] __aligned(__alignof(u32)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 32); + return add(&table->root4, 32, key, cidr, peer, lock); +} + +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock) +{ + /* Aligned so it can be passed to fls64 */ + u8 key[16] __aligned(__alignof(u64)); + + ++table->seq; + swap_endian(key, (const u8 *)ip, 128); + return add(&table->root6, 128, key, cidr, peer, lock); +} + +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock) +{ + ++table->seq; + walk_remove_by_peer(&table->root4, peer, lock); + walk_remove_by_peer(&table->root6, peer, lock); +} + +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) +{ + const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U); + swap_endian(ip, node->bits, node->bitlen); + memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes); + if (node->cidr) + ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U); + + *cidr = node->cidr; + return node->bitlen == 32 ? AF_INET : AF_INET6; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr); + return NULL; +} + +/* Returns a strong reference to a peer */ +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return lookup(table->root4, 32, &ip_hdr(skb)->saddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr); + return NULL; +} + +#include "selftest/allowedips.c" diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h new file mode 100644 index 000000000000..e5c83cafcef4 --- /dev/null +++ b/drivers/net/wireguard/allowedips.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_ALLOWEDIPS_H +#define _WG_ALLOWEDIPS_H + +#include +#include +#include + +struct wg_peer; + +struct allowedips_node { + struct wg_peer __rcu *peer; + struct allowedips_node __rcu *bit[2]; + /* While it may seem scandalous that we waste space for v4, + * we're alloc'ing to the nearest power of 2 anyway, so this + * doesn't actually make a difference. + */ + u8 bits[16] __aligned(__alignof(u64)); + u8 cidr, bit_at_a, bit_at_b, bitlen; + + /* Keep rarely used list at bottom to be beyond cache line. */ + union { + struct list_head peer_list; + struct rcu_head rcu; + }; +}; + +struct allowedips { + struct allowedips_node __rcu *root4; + struct allowedips_node __rcu *root6; + u64 seq; +}; + +void wg_allowedips_init(struct allowedips *table); +void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); +int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip, + u8 cidr, struct wg_peer *peer, struct mutex *lock); +void wg_allowedips_remove_by_peer(struct allowedips *table, + struct wg_peer *peer, struct mutex *lock); +/* The ip input pointer should be __aligned(__alignof(u64))) */ +int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr); + +/* These return a strong reference to a peer: */ +struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table, + struct sk_buff *skb); +struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table, + struct sk_buff *skb); + +#ifdef DEBUG +bool wg_allowedips_selftest(void); +#endif + +#endif /* _WG_ALLOWEDIPS_H */ diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c new file mode 100644 index 000000000000..4956f0499c19 --- /dev/null +++ b/drivers/net/wireguard/cookie.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "cookie.h" +#include "peer.h" +#include "device.h" +#include "messages.h" +#include "ratelimiter.h" +#include "timers.h" + +#include +#include + +#include +#include + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg) +{ + init_rwsem(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + checker->device = wg; +} + +enum { COOKIE_KEY_LABEL_LEN = 8 }; +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----"; +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--"; + +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 pubkey[NOISE_PUBLIC_KEY_LEN], + const u8 label[COOKIE_KEY_LABEL_LEN]) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN); + blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN); + blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN); + blake2s_final(&blake, key); +} + +/* Must hold peer->handshake.static_identity->lock */ +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker) +{ + if (likely(checker->device->static_identity.has_identity)) { + precompute_key(checker->cookie_encryption_key, + checker->device->static_identity.static_public, + cookie_key_label); + precompute_key(checker->message_mac1_key, + checker->device->static_identity.static_public, + mac1_key_label); + } else { + memset(checker->cookie_encryption_key, 0, + NOISE_SYMMETRIC_KEY_LEN); + memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN); + } +} + +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer) +{ + precompute_key(peer->latest_cookie.cookie_decryption_key, + peer->handshake.remote_static, cookie_key_label); + precompute_key(peer->latest_cookie.message_mac1_key, + peer->handshake.remote_static, mac1_key_label); +} + +void wg_cookie_init(struct cookie *cookie) +{ + memset(cookie, 0, sizeof(*cookie)); + init_rwsem(&cookie->lock); +} + +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, + const u8 key[NOISE_SYMMETRIC_KEY_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac1); + blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN); +} + +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, + const u8 cookie[COOKIE_LEN]) +{ + len = len - sizeof(struct message_macs) + + offsetof(struct message_macs, mac2); + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); +} + +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, + struct cookie_checker *checker) +{ + struct blake2s_state state; + + if (wg_birthdate_has_expired(checker->secret_birthdate, + COOKIE_SECRET_MAX_AGE)) { + down_write(&checker->secret_lock); + checker->secret_birthdate = ktime_get_coarse_boottime_ns(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + up_write(&checker->secret_lock); + } + + down_read(&checker->secret_lock); + + blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN); + if (skb->protocol == htons(ETH_P_IP)) + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, + sizeof(struct in_addr)); + else if (skb->protocol == htons(ETH_P_IPV6)) + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); + blake2s_final(&state, cookie); + + up_read(&checker->secret_lock); +} + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie) +{ + struct message_macs *macs = (struct message_macs *) + (skb->data + skb->len - sizeof(*macs)); + enum cookie_mac_state ret; + u8 computed_mac[COOKIE_LEN]; + u8 cookie[COOKIE_LEN]; + + ret = INVALID_MAC; + compute_mac1(computed_mac, skb->data, skb->len, + checker->message_mac1_key); + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_BUT_NO_COOKIE; + + if (!check_cookie) + goto out; + + make_cookie(cookie, skb, checker); + + compute_mac2(computed_mac, skb->data, skb->len, cookie); + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; + if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev))) + goto out; + + ret = VALID_MAC_WITH_COOKIE; + +out: + return ret; +} + +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)message + len - sizeof(*macs)); + + down_write(&peer->latest_cookie.lock); + compute_mac1(macs->mac1, message, len, + peer->latest_cookie.message_mac1_key); + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); + peer->latest_cookie.have_sent_mac1 = true; + up_write(&peer->latest_cookie.lock); + + down_read(&peer->latest_cookie.lock); + if (peer->latest_cookie.is_valid && + !wg_birthdate_has_expired(peer->latest_cookie.birthdate, + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) + compute_mac2(macs->mac2, message, len, + peer->latest_cookie.cookie); + else + memset(macs->mac2, 0, COOKIE_LEN); + up_read(&peer->latest_cookie.lock); +} + +void wg_cookie_message_create(struct message_handshake_cookie *dst, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker) +{ + struct message_macs *macs = (struct message_macs *) + ((u8 *)skb->data + skb->len - sizeof(*macs)); + u8 cookie[COOKIE_LEN]; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE); + dst->receiver_index = index; + get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN); + + make_cookie(cookie, skb, checker); + xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, + macs->mac1, COOKIE_LEN, dst->nonce, + checker->cookie_encryption_key); +} + +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL; + u8 cookie[COOKIE_LEN]; + bool ret; + + if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable, + INDEX_HASHTABLE_HANDSHAKE | + INDEX_HASHTABLE_KEYPAIR, + src->receiver_index, &peer))) + return; + + down_read(&peer->latest_cookie.lock); + if (unlikely(!peer->latest_cookie.have_sent_mac1)) { + up_read(&peer->latest_cookie.lock); + goto out; + } + ret = xchacha20poly1305_decrypt( + cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), + peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, + peer->latest_cookie.cookie_decryption_key); + up_read(&peer->latest_cookie.lock); + + if (ret) { + down_write(&peer->latest_cookie.lock); + memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN); + peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns(); + peer->latest_cookie.is_valid = true; + peer->latest_cookie.have_sent_mac1 = false; + up_write(&peer->latest_cookie.lock); + } else { + net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", + wg->dev->name); + } + +out: + wg_peer_put(peer); +} diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h new file mode 100644 index 000000000000..c4bd61ca03f2 --- /dev/null +++ b/drivers/net/wireguard/cookie.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_COOKIE_H +#define _WG_COOKIE_H + +#include "messages.h" +#include + +struct wg_peer; + +struct cookie_checker { + u8 secret[NOISE_HASH_LEN]; + u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + u64 secret_birthdate; + struct rw_semaphore secret_lock; + struct wg_device *device; +}; + +struct cookie { + u64 birthdate; + bool is_valid; + u8 cookie[COOKIE_LEN]; + bool have_sent_mac1; + u8 last_mac1_sent[COOKIE_LEN]; + u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN]; + u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; + struct rw_semaphore lock; +}; + +enum cookie_mac_state { + INVALID_MAC, + VALID_MAC_BUT_NO_COOKIE, + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, + VALID_MAC_WITH_COOKIE +}; + +void wg_cookie_checker_init(struct cookie_checker *checker, + struct wg_device *wg); +void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker); +void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer); +void wg_cookie_init(struct cookie *cookie); + +enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker, + struct sk_buff *skb, + bool check_cookie); +void wg_cookie_add_mac_to_packet(void *message, size_t len, + struct wg_peer *peer); + +void wg_cookie_message_create(struct message_handshake_cookie *src, + struct sk_buff *skb, __le32 index, + struct cookie_checker *checker); +void wg_cookie_message_consume(struct message_handshake_cookie *src, + struct wg_device *wg); + +#endif /* _WG_COOKIE_H */ diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c new file mode 100644 index 000000000000..cdc96968b0f4 --- /dev/null +++ b/drivers/net/wireguard/device.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "socket.h" +#include "timers.h" +#include "device.h" +#include "ratelimiter.h" +#include "peer.h" +#include "messages.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static LIST_HEAD(device_list); + +static int wg_open(struct net_device *dev) +{ + struct in_device *dev_v4 = __in_dev_get_rtnl(dev); + struct inet6_dev *dev_v6 = __in6_dev_get(dev); + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + int ret; + + if (dev_v4) { + /* At some point we might put this check near the ip_rt_send_ + * redirect call of ip_forward in net/ipv4/ip_forward.c, similar + * to the current secpath check. + */ + IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false); + IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false; + } + if (dev_v6) + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE; + + ret = wg_socket_init(wg, wg->incoming_port); + if (ret < 0) + return ret; + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_send_staged_packets(peer); + if (peer->persistent_keepalive_interval) + wg_packet_send_keepalive(peer); + } + mutex_unlock(&wg->device_update_lock); + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int wg_pm_notification(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct wg_device *wg; + struct wg_peer *peer; + + /* If the machine is constantly suspending and resuming, as part of + * its normal operation rather than as a somewhat rare event, then we + * don't actually want to clear keys. + */ + if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID)) + return 0; + + if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE) + return 0; + + rtnl_lock(); + list_for_each_entry(wg, &device_list, device_list) { + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + del_timer(&peer->timer_zero_key_material); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + } + mutex_unlock(&wg->device_update_lock); + } + rtnl_unlock(); + rcu_barrier(); + return 0; +} + +static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification }; +#endif + +static int wg_stop(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct wg_peer *peer; + + mutex_lock(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) { + wg_packet_purge_staged_packets(peer); + wg_timers_stop(peer); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + } + mutex_unlock(&wg->device_update_lock); + skb_queue_purge(&wg->incoming_handshakes); + wg_socket_reinit(wg, NULL, NULL); + return 0; +} + +static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + struct sk_buff_head packets; + struct wg_peer *peer; + struct sk_buff *next; + sa_family_t family; + u32 mtu; + int ret; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { + ret = -EPROTONOSUPPORT; + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); + goto err; + } + + peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb); + if (unlikely(!peer)) { + ret = -ENOKEY; + if (skb->protocol == htons(ETH_P_IP)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n", + dev->name, &ip_hdr(skb)->daddr); + else if (skb->protocol == htons(ETH_P_IPV6)) + net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", + dev->name, &ipv6_hdr(skb)->daddr); + goto err; + } + + family = READ_ONCE(peer->endpoint.addr.sa_family); + if (unlikely(family != AF_INET && family != AF_INET6)) { + ret = -EDESTADDRREQ; + net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n", + dev->name, peer->internal_id); + goto err_peer; + } + + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + __skb_queue_head_init(&packets); + if (!skb_is_gso(skb)) { + skb_mark_not_on_list(skb); + } else { + struct sk_buff *segs = skb_gso_segment(skb, 0); + + if (unlikely(IS_ERR(segs))) { + ret = PTR_ERR(segs); + goto err_peer; + } + dev_kfree_skb(skb); + skb = segs; + } + + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + continue; + + /* We only need to keep the original dst around for icmp, + * so at this point we're in a position to drop it. + */ + skb_dst_drop(skb); + + PACKET_CB(skb)->mtu = mtu; + + __skb_queue_tail(&packets, skb); + } + + spin_lock_bh(&peer->staged_packet_queue.lock); + /* If the queue is getting too big, we start removing the oldest packets + * until it's small again. We do this before adding the new packet, so + * we don't remove GSO segments that are in excess. + */ + while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) { + dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue)); + ++dev->stats.tx_dropped; + } + skb_queue_splice_tail(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + wg_packet_send_staged_packets(peer); + + wg_peer_put(peer); + return NETDEV_TX_OK; + +err_peer: + wg_peer_put(peer); +err: + ++dev->stats.tx_errors; + if (skb->protocol == htons(ETH_P_IP)) + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + else if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + kfree_skb(skb); + return ret; +} + +static const struct net_device_ops netdev_ops = { + .ndo_open = wg_open, + .ndo_stop = wg_stop, + .ndo_start_xmit = wg_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64 +}; + +static void wg_destruct(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + + rtnl_lock(); + list_del(&wg->device_list); + rtnl_unlock(); + mutex_lock(&wg->device_update_lock); + wg->incoming_port = 0; + wg_socket_reinit(wg, NULL, NULL); + /* The final references are cleared in the below calls to destroy_workqueue. */ + wg_peer_remove_all(wg); + destroy_workqueue(wg->handshake_receive_wq); + destroy_workqueue(wg->handshake_send_wq); + destroy_workqueue(wg->packet_crypt_wq); + wg_packet_queue_free(&wg->decrypt_queue, true); + wg_packet_queue_free(&wg->encrypt_queue, true); + rcu_barrier(); /* Wait for all the peers to be actually freed. */ + wg_ratelimiter_uninit(); + memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); + skb_queue_purge(&wg->incoming_handshakes); + free_percpu(dev->tstats); + free_percpu(wg->incoming_handshakes_worker); + if (wg->have_creating_net_ref) + put_net(wg->creating_net); + kvfree(wg->index_hashtable); + kvfree(wg->peer_hashtable); + mutex_unlock(&wg->device_update_lock); + + pr_debug("%s: Interface deleted\n", dev->name); + free_netdev(dev); +} + +static const struct device_type device_type = { .name = KBUILD_MODNAME }; + +static void wg_setup(struct net_device *dev) +{ + struct wg_device *wg = netdev_priv(dev); + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_SG | NETIF_F_GSO | + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA }; + const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) + + max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + + dev->netdev_ops = &netdev_ops; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + dev->features |= NETIF_F_LLTX; + dev->features |= WG_NETDEV_FEATURES; + dev->hw_features |= WG_NETDEV_FEATURES; + dev->hw_enc_features |= WG_NETDEV_FEATURES; + dev->mtu = ETH_DATA_LEN - overhead; + dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead; + + SET_NETDEV_DEVTYPE(dev, &device_type); + + /* We need to keep the dst around in case of icmp replies. */ + netif_keep_dst(dev); + + memset(wg, 0, sizeof(*wg)); + wg->dev = dev; +} + +static int wg_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct wg_device *wg = netdev_priv(dev); + int ret = -ENOMEM; + + wg->creating_net = src_net; + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); + skb_queue_head_init(&wg->incoming_handshakes); + wg_allowedips_init(&wg->peer_allowedips); + wg_cookie_checker_init(&wg->cookie_checker, wg); + INIT_LIST_HEAD(&wg->peer_list); + wg->device_update_gen = 1; + + wg->peer_hashtable = wg_pubkey_hashtable_alloc(); + if (!wg->peer_hashtable) + return ret; + + wg->index_hashtable = wg_index_hashtable_alloc(); + if (!wg->index_hashtable) + goto err_free_peer_hashtable; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + goto err_free_index_hashtable; + + wg->incoming_handshakes_worker = + wg_packet_percpu_multicore_worker_alloc( + wg_packet_handshake_receive_worker, wg); + if (!wg->incoming_handshakes_worker) + goto err_free_tstats; + + wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", + WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_receive_wq) + goto err_free_incoming_handshakes; + + wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", + WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->handshake_send_wq) + goto err_destroy_handshake_receive; + + wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s", + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name); + if (!wg->packet_crypt_wq) + goto err_destroy_handshake_send; + + ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_destroy_packet_crypt; + + ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, + true, MAX_QUEUED_PACKETS); + if (ret < 0) + goto err_free_encrypt_queue; + + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_decrypt_queue; + + ret = register_netdevice(dev); + if (ret < 0) + goto err_uninit_ratelimiter; + + list_add(&wg->device_list, &device_list); + + /* We wait until the end to assign priv_destructor, so that + * register_netdevice doesn't call it for us if it fails. + */ + dev->priv_destructor = wg_destruct; + + pr_debug("%s: Interface created\n", dev->name); + return ret; + +err_uninit_ratelimiter: + wg_ratelimiter_uninit(); +err_free_decrypt_queue: + wg_packet_queue_free(&wg->decrypt_queue, true); +err_free_encrypt_queue: + wg_packet_queue_free(&wg->encrypt_queue, true); +err_destroy_packet_crypt: + destroy_workqueue(wg->packet_crypt_wq); +err_destroy_handshake_send: + destroy_workqueue(wg->handshake_send_wq); +err_destroy_handshake_receive: + destroy_workqueue(wg->handshake_receive_wq); +err_free_incoming_handshakes: + free_percpu(wg->incoming_handshakes_worker); +err_free_tstats: + free_percpu(dev->tstats); +err_free_index_hashtable: + kvfree(wg->index_hashtable); +err_free_peer_hashtable: + kvfree(wg->peer_hashtable); + return ret; +} + +static struct rtnl_link_ops link_ops __read_mostly = { + .kind = KBUILD_MODNAME, + .priv_size = sizeof(struct wg_device), + .setup = wg_setup, + .newlink = wg_newlink, +}; + +static int wg_netdevice_notification(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct net_device *dev = ((struct netdev_notifier_info *)data)->dev; + struct wg_device *wg = netdev_priv(dev); + + ASSERT_RTNL(); + + if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops) + return 0; + + if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) { + put_net(wg->creating_net); + wg->have_creating_net_ref = false; + } else if (dev_net(dev) != wg->creating_net && + !wg->have_creating_net_ref) { + wg->have_creating_net_ref = true; + get_net(wg->creating_net); + } + return 0; +} + +static struct notifier_block netdevice_notifier = { + .notifier_call = wg_netdevice_notification +}; + +int __init wg_device_init(void) +{ + int ret; + +#ifdef CONFIG_PM_SLEEP + ret = register_pm_notifier(&pm_notifier); + if (ret) + return ret; +#endif + + ret = register_netdevice_notifier(&netdevice_notifier); + if (ret) + goto error_pm; + + ret = rtnl_link_register(&link_ops); + if (ret) + goto error_netdevice; + + return 0; + +error_netdevice: + unregister_netdevice_notifier(&netdevice_notifier); +error_pm: +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + return ret; +} + +void wg_device_uninit(void) +{ + rtnl_link_unregister(&link_ops); + unregister_netdevice_notifier(&netdevice_notifier); +#ifdef CONFIG_PM_SLEEP + unregister_pm_notifier(&pm_notifier); +#endif + rcu_barrier(); +} diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h new file mode 100644 index 000000000000..b15a8be9d816 --- /dev/null +++ b/drivers/net/wireguard/device.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_DEVICE_H +#define _WG_DEVICE_H + +#include "noise.h" +#include "allowedips.h" +#include "peerlookup.h" +#include "cookie.h" + +#include +#include +#include +#include +#include +#include + +struct wg_device; + +struct multicore_worker { + void *ptr; + struct work_struct work; +}; + +struct crypt_queue { + struct ptr_ring ring; + union { + struct { + struct multicore_worker __percpu *worker; + int last_cpu; + }; + struct work_struct work; + }; +}; + +struct wg_device { + struct net_device *dev; + struct crypt_queue encrypt_queue, decrypt_queue; + struct sock __rcu *sock4, *sock6; + struct net *creating_net; + struct noise_static_identity static_identity; + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; + struct workqueue_struct *packet_crypt_wq; + struct sk_buff_head incoming_handshakes; + int incoming_handshake_cpu; + struct multicore_worker __percpu *incoming_handshakes_worker; + struct cookie_checker cookie_checker; + struct pubkey_hashtable *peer_hashtable; + struct index_hashtable *index_hashtable; + struct allowedips peer_allowedips; + struct mutex device_update_lock, socket_update_lock; + struct list_head device_list, peer_list; + unsigned int num_peers, device_update_gen; + u32 fwmark; + u16 incoming_port; + bool have_creating_net_ref; +}; + +int wg_device_init(void); +void wg_device_uninit(void); + +#endif /* _WG_DEVICE_H */ diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c new file mode 100644 index 000000000000..7a7d5f1a80fc --- /dev/null +++ b/drivers/net/wireguard/main.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "version.h" +#include "device.h" +#include "noise.h" +#include "queueing.h" +#include "ratelimiter.h" +#include "netlink.h" + +#include + +#include +#include +#include +#include + +static int __init mod_init(void) +{ + int ret; + +#ifdef DEBUG + if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || + !wg_ratelimiter_selftest()) + return -ENOTRECOVERABLE; +#endif + wg_noise_init(); + + ret = wg_device_init(); + if (ret < 0) + goto err_device; + + ret = wg_genetlink_init(); + if (ret < 0) + goto err_netlink; + + pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); + pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved.\n"); + + return 0; + +err_netlink: + wg_device_uninit(); +err_device: + return ret; +} + +static void __exit mod_exit(void) +{ + wg_genetlink_uninit(); + wg_device_uninit(); +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("WireGuard secure network tunnel"); +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_VERSION(WIREGUARD_VERSION); +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); +MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME); diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h new file mode 100644 index 000000000000..b8a7b9ce32ba --- /dev/null +++ b/drivers/net/wireguard/messages.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_MESSAGES_H +#define _WG_MESSAGES_H + +#include +#include +#include + +#include +#include +#include + +enum noise_lengths { + NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE, + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE, + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE, + NOISE_HASH_LEN = BLAKE2S_HASH_SIZE +}; + +#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN) + +enum cookie_values { + COOKIE_SECRET_MAX_AGE = 2 * 60, + COOKIE_SECRET_LATENCY = 5, + COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE, + COOKIE_LEN = 16 +}; + +enum counter_values { + COUNTER_BITS_TOTAL = 2048, + COUNTER_REDUNDANT_BITS = BITS_PER_LONG, + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS +}; + +enum limits { + REKEY_AFTER_MESSAGES = 1ULL << 60, + REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1, + REKEY_TIMEOUT = 5, + REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3, + REKEY_AFTER_TIME = 120, + REJECT_AFTER_TIME = 180, + INITIATIONS_PER_SECOND = 50, + MAX_PEERS_PER_DEVICE = 1U << 20, + KEEPALIVE_TIMEOUT = 10, + MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT, + MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */ + MAX_STAGED_PACKETS = 128, + MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */ +}; + +enum message_type { + MESSAGE_INVALID = 0, + MESSAGE_HANDSHAKE_INITIATION = 1, + MESSAGE_HANDSHAKE_RESPONSE = 2, + MESSAGE_HANDSHAKE_COOKIE = 3, + MESSAGE_DATA = 4 +}; + +struct message_header { + /* The actual layout of this that we want is: + * u8 type + * u8 reserved_zero[3] + * + * But it turns out that by encoding this as little endian, + * we achieve the same thing, and it makes checking faster. + */ + __le32 type; +}; + +struct message_macs { + u8 mac1[COOKIE_LEN]; + u8 mac2[COOKIE_LEN]; +}; + +struct message_handshake_initiation { + struct message_header header; + __le32 sender_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; + struct message_macs macs; +}; + +struct message_handshake_response { + struct message_header header; + __le32 sender_index; + __le32 receiver_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_nothing[noise_encrypted_len(0)]; + struct message_macs macs; +}; + +struct message_handshake_cookie { + struct message_header header; + __le32 receiver_index; + u8 nonce[COOKIE_NONCE_LEN]; + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; +}; + +struct message_data { + struct message_header header; + __le32 key_idx; + __le64 counter; + u8 encrypted_data[]; +}; + +#define message_data_len(plain_len) \ + (noise_encrypted_len(plain_len) + sizeof(struct message_data)) + +enum message_alignments { + MESSAGE_PADDING_MULTIPLE = 16, + MESSAGE_MINIMUM_LENGTH = message_data_len(0) +}; + +#define SKB_HEADER_LEN \ + (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \ + sizeof(struct udphdr) + NET_SKB_PAD) +#define DATA_PACKET_HEAD_ROOM \ + ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4) + +enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ }; + +#endif /* _WG_MESSAGES_H */ diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c new file mode 100644 index 000000000000..bda26405497c --- /dev/null +++ b/drivers/net/wireguard/netlink.c @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "netlink.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include + +#include +#include +#include +#include + +static struct genl_family genl_family; + +static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { + [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, + [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, + [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, + [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, + [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, + [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } +}; + +static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { + [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN }, + [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN }, + [WGPEER_A_FLAGS] = { .type = NLA_U32 }, + [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) }, + [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, + [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) }, + [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, + [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, + [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } +}; + +static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { + [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, + [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) }, + [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } +}; + +static struct wg_device *lookup_interface(struct nlattr **attrs, + struct sk_buff *skb) +{ + struct net_device *dev = NULL; + + if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) + return ERR_PTR(-EBADR); + if (attrs[WGDEVICE_A_IFINDEX]) + dev = dev_get_by_index(sock_net(skb->sk), + nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); + else if (attrs[WGDEVICE_A_IFNAME]) + dev = dev_get_by_name(sock_net(skb->sk), + nla_data(attrs[WGDEVICE_A_IFNAME])); + if (!dev) + return ERR_PTR(-ENODEV); + if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || + strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { + dev_put(dev); + return ERR_PTR(-EOPNOTSUPP); + } + return netdev_priv(dev); +} + +static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, + int family) +{ + struct nlattr *allowedip_nest; + + allowedip_nest = nla_nest_start(skb, 0); + if (!allowedip_nest) + return -EMSGSIZE; + + if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || + nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || + nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? + sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { + nla_nest_cancel(skb, allowedip_nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, allowedip_nest); + return 0; +} + +struct dump_ctx { + struct wg_device *wg; + struct wg_peer *next_peer; + u64 allowedips_seq; + struct allowedips_node *next_allowedip; +}; + +#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) + +static int +get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) +{ + + struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); + struct allowedips_node *allowedips_node = ctx->next_allowedip; + bool fail; + + if (!peer_nest) + return -EMSGSIZE; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, + peer->handshake.remote_static); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (!allowedips_node) { + const struct __kernel_timespec last_handshake = { + .tv_sec = peer->walltime_last_handshake.tv_sec, + .tv_nsec = peer->walltime_last_handshake.tv_nsec + }; + + down_read(&peer->handshake.lock); + fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, + NOISE_SYMMETRIC_KEY_LEN, + peer->handshake.preshared_key); + up_read(&peer->handshake.lock); + if (fail) + goto err; + + if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, + sizeof(last_handshake), &last_handshake) || + nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + peer->persistent_keepalive_interval) || + nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, + WGPEER_A_UNSPEC) || + nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) + goto err; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr4), + &peer->endpoint.addr4); + else if (peer->endpoint.addr.sa_family == AF_INET6) + fail = nla_put(skb, WGPEER_A_ENDPOINT, + sizeof(peer->endpoint.addr6), + &peer->endpoint.addr6); + read_unlock_bh(&peer->endpoint_lock); + if (fail) + goto err; + allowedips_node = + list_first_entry_or_null(&peer->allowedips_list, + struct allowedips_node, peer_list); + } + if (!allowedips_node) + goto no_allowedips; + if (!ctx->allowedips_seq) + ctx->allowedips_seq = peer->device->peer_allowedips.seq; + else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq) + goto no_allowedips; + + allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); + if (!allowedips_nest) + goto err; + + list_for_each_entry_from(allowedips_node, &peer->allowedips_list, + peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family; + + family = wg_allowedips_read_node(allowedips_node, ip, &cidr); + if (get_allowedips(skb, ip, cidr, family)) { + nla_nest_end(skb, allowedips_nest); + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = allowedips_node; + return -EMSGSIZE; + } + } + nla_nest_end(skb, allowedips_nest); +no_allowedips: + nla_nest_end(skb, peer_nest); + ctx->next_allowedip = NULL; + ctx->allowedips_seq = 0; + return 0; +err: + nla_nest_cancel(skb, peer_nest); + return -EMSGSIZE; +} + +static int wg_get_device_start(struct netlink_callback *cb) +{ + struct wg_device *wg; + + wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb); + if (IS_ERR(wg)) + return PTR_ERR(wg); + DUMP_CTX(cb)->wg = wg; + return 0; +} + +static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct wg_peer *peer, *next_peer_cursor; + struct dump_ctx *ctx = DUMP_CTX(cb); + struct wg_device *wg = ctx->wg; + struct nlattr *peers_nest; + int ret = -EMSGSIZE; + bool done = true; + void *hdr; + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + cb->seq = wg->device_update_gen; + next_peer_cursor = ctx->next_peer; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); + if (!hdr) + goto out; + genl_dump_check_consistent(cb, hdr); + + if (!ctx->next_peer) { + if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, + wg->incoming_port) || + nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || + nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || + nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) + goto out; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity) { + if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_private) || + nla_put(skb, WGDEVICE_A_PUBLIC_KEY, + NOISE_PUBLIC_KEY_LEN, + wg->static_identity.static_public)) { + up_read(&wg->static_identity.lock); + goto out; + } + } + up_read(&wg->static_identity.lock); + } + + peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); + if (!peers_nest) + goto out; + ret = 0; + /* If the last cursor was removed via list_del_init in peer_remove, then + * we just treat this the same as there being no more peers left. The + * reason is that seq_nr should indicate to userspace that this isn't a + * coherent dump anyway, so they'll try again. + */ + if (list_empty(&wg->peer_list) || + (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) { + nla_nest_cancel(skb, peers_nest); + goto out; + } + lockdep_assert_held(&wg->device_update_lock); + peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); + list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { + if (get_peer(peer, skb, ctx)) { + done = false; + break; + } + next_peer_cursor = peer; + } + nla_nest_end(skb, peers_nest); + +out: + if (!ret && !done && next_peer_cursor) + wg_peer_get(next_peer_cursor); + wg_peer_put(ctx->next_peer); + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + + if (ret) { + genlmsg_cancel(skb, hdr); + return ret; + } + genlmsg_end(skb, hdr); + if (done) { + ctx->next_peer = NULL; + return 0; + } + ctx->next_peer = next_peer_cursor; + return skb->len; + + /* At this point, we can't really deal ourselves with safely zeroing out + * the private key material after usage. This will need an additional API + * in the kernel for marking skbs as zero_on_free. + */ +} + +static int wg_get_device_done(struct netlink_callback *cb) +{ + struct dump_ctx *ctx = DUMP_CTX(cb); + + if (ctx->wg) + dev_put(ctx->wg->dev); + wg_peer_put(ctx->next_peer); + return 0; +} + +static int set_port(struct wg_device *wg, u16 port) +{ + struct wg_peer *peer; + + if (wg->incoming_port == port) + return 0; + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + if (!netif_running(wg->dev)) { + wg->incoming_port = port; + return 0; + } + return wg_socket_init(wg, port); +} + +static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) +{ + int ret = -EINVAL; + u16 family; + u8 cidr; + + if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || + !attrs[WGALLOWEDIP_A_CIDR_MASK]) + return ret; + family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); + cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); + + if (family == AF_INET && cidr <= 32 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) + ret = wg_allowedips_insert_v4( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + else if (family == AF_INET6 && cidr <= 128 && + nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) + ret = wg_allowedips_insert_v6( + &peer->device->peer_allowedips, + nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, + &peer->device->device_update_lock); + + return ret; +} + +static int set_peer(struct wg_device *wg, struct nlattr **attrs) +{ + u8 *public_key = NULL, *preshared_key = NULL; + struct wg_peer *peer = NULL; + u32 flags = 0; + int ret; + + ret = -EINVAL; + if (attrs[WGPEER_A_PUBLIC_KEY] && + nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) + public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); + else + goto out; + if (attrs[WGPEER_A_PRESHARED_KEY] && + nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) + preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); + + if (attrs[WGPEER_A_FLAGS]) + flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGPEER_F_ALL) + goto out; + + ret = -EPFNOSUPPORT; + if (attrs[WGPEER_A_PROTOCOL_VERSION]) { + if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) + goto out; + } + + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + nla_data(attrs[WGPEER_A_PUBLIC_KEY])); + ret = 0; + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ + if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) + goto out; + + /* The peer is new, so there aren't allowed IPs to remove. */ + flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity && + !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), + wg->static_identity.static_public, + NOISE_PUBLIC_KEY_LEN)) { + /* We silently ignore peers that have the same public + * key as the device. The reason we do it silently is + * that we'd like for people to be able to reuse the + * same set of API calls across peers. + */ + up_read(&wg->static_identity.lock); + ret = 0; + goto out; + } + up_read(&wg->static_identity.lock); + + peer = wg_peer_create(wg, public_key, preshared_key); + if (IS_ERR(peer)) { + /* Similar to the above, if the key is invalid, we skip + * it without fanfare, so that services don't need to + * worry about doing key validation themselves. + */ + ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + peer = NULL; + goto out; + } + /* Take additional reference, as though we've just been + * looked up. + */ + wg_peer_get(peer); + } + + if (flags & WGPEER_F_REMOVE_ME) { + wg_peer_remove(peer); + goto out; + } + + if (preshared_key) { + down_write(&peer->handshake.lock); + memcpy(&peer->handshake.preshared_key, preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + up_write(&peer->handshake.lock); + } + + if (attrs[WGPEER_A_ENDPOINT]) { + struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); + size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); + + if ((len == sizeof(struct sockaddr_in) && + addr->sa_family == AF_INET) || + (len == sizeof(struct sockaddr_in6) && + addr->sa_family == AF_INET6)) { + struct endpoint endpoint = { { { 0 } } }; + + memcpy(&endpoint.addr, addr, len); + wg_socket_set_peer_endpoint(peer, &endpoint); + } + } + + if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) + wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, + &wg->device_update_lock); + + if (attrs[WGPEER_A_ALLOWEDIPS]) { + struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { + ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, + attr, allowedip_policy, NULL); + if (ret < 0) + goto out; + ret = set_allowedip(peer, allowedip); + if (ret < 0) + goto out; + } + } + + if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { + const u16 persistent_keepalive_interval = nla_get_u16( + attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); + const bool send_keepalive = + !peer->persistent_keepalive_interval && + persistent_keepalive_interval && + netif_running(wg->dev); + + peer->persistent_keepalive_interval = persistent_keepalive_interval; + if (send_keepalive) + wg_packet_send_keepalive(peer); + } + + if (netif_running(wg->dev)) + wg_packet_send_staged_packets(peer); + +out: + wg_peer_put(peer); + if (attrs[WGPEER_A_PRESHARED_KEY]) + memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), + nla_len(attrs[WGPEER_A_PRESHARED_KEY])); + return ret; +} + +static int wg_set_device(struct sk_buff *skb, struct genl_info *info) +{ + struct wg_device *wg = lookup_interface(info->attrs, skb); + u32 flags = 0; + int ret; + + if (IS_ERR(wg)) { + ret = PTR_ERR(wg); + goto out_nodev; + } + + rtnl_lock(); + mutex_lock(&wg->device_update_lock); + + if (info->attrs[WGDEVICE_A_FLAGS]) + flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); + ret = -EOPNOTSUPP; + if (flags & ~__WGDEVICE_F_ALL) + goto out; + + ret = -EPERM; + if ((info->attrs[WGDEVICE_A_LISTEN_PORT] || + info->attrs[WGDEVICE_A_FWMARK]) && + !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN)) + goto out; + + ++wg->device_update_gen; + + if (info->attrs[WGDEVICE_A_FWMARK]) { + struct wg_peer *peer; + + wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); + } + + if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { + ret = set_port(wg, + nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); + if (ret) + goto out; + } + + if (flags & WGDEVICE_F_REPLACE_PEERS) + wg_peer_remove_all(wg); + + if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == + NOISE_PUBLIC_KEY_LEN) { + u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); + u8 public_key[NOISE_PUBLIC_KEY_LEN]; + struct wg_peer *peer, *temp; + + if (!crypto_memneq(wg->static_identity.static_private, + private_key, NOISE_PUBLIC_KEY_LEN)) + goto skip_set_private_key; + + /* We remove before setting, to prevent race, which means doing + * two 25519-genpub ops. + */ + if (curve25519_generate_public(public_key, private_key)) { + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, + public_key); + if (peer) { + wg_peer_put(peer); + wg_peer_remove(peer); + } + } + + down_write(&wg->static_identity.lock); + wg_noise_set_static_identity_private_key(&wg->static_identity, + private_key); + list_for_each_entry_safe(peer, temp, &wg->peer_list, + peer_list) { + BUG_ON(!wg_noise_precompute_static_static(peer)); + wg_noise_expire_current_peer_keypairs(peer); + } + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); + up_write(&wg->static_identity.lock); + } +skip_set_private_key: + + if (info->attrs[WGDEVICE_A_PEERS]) { + struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; + int rem; + + nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { + ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, + peer_policy, NULL); + if (ret < 0) + goto out; + ret = set_peer(wg, peer); + if (ret < 0) + goto out; + } + } + ret = 0; + +out: + mutex_unlock(&wg->device_update_lock); + rtnl_unlock(); + dev_put(wg->dev); +out_nodev: + if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) + memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), + nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); + return ret; +} + +static const struct genl_ops genl_ops[] = { + { + .cmd = WG_CMD_GET_DEVICE, + .start = wg_get_device_start, + .dumpit = wg_get_device_dump, + .done = wg_get_device_done, + .flags = GENL_UNS_ADMIN_PERM + }, { + .cmd = WG_CMD_SET_DEVICE, + .doit = wg_set_device, + .flags = GENL_UNS_ADMIN_PERM + } +}; + +static struct genl_family genl_family __ro_after_init = { + .ops = genl_ops, + .n_ops = ARRAY_SIZE(genl_ops), + .name = WG_GENL_NAME, + .version = WG_GENL_VERSION, + .maxattr = WGDEVICE_A_MAX, + .module = THIS_MODULE, + .policy = device_policy, + .netnsok = true +}; + +int __init wg_genetlink_init(void) +{ + return genl_register_family(&genl_family); +} + +void __exit wg_genetlink_uninit(void) +{ + genl_unregister_family(&genl_family); +} diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h new file mode 100644 index 000000000000..15100d92e2e3 --- /dev/null +++ b/drivers/net/wireguard/netlink.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_NETLINK_H +#define _WG_NETLINK_H + +int wg_genetlink_init(void); +void wg_genetlink_uninit(void); + +#endif /* _WG_NETLINK_H */ diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c new file mode 100644 index 000000000000..919d9d866446 --- /dev/null +++ b/drivers/net/wireguard/noise.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "noise.h" +#include "device.h" +#include "peer.h" +#include "messages.h" +#include "queueing.h" +#include "peerlookup.h" + +#include +#include +#include +#include +#include +#include + +/* This implements Noise_IKpsk2: + * + * <- s + * ****** + * -> e, es, s, ss, {t} + * <- e, ee, se, psk, {} + */ + +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"; +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com"; +static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init; +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init; +static atomic64_t keypair_counter = ATOMIC64_INIT(0); + +void __init wg_noise_init(void) +{ + struct blake2s_state blake; + + blake2s(handshake_init_chaining_key, handshake_name, NULL, + NOISE_HASH_LEN, sizeof(handshake_name), 0); + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN); + blake2s_update(&blake, identifier_name, sizeof(identifier_name)); + blake2s_final(&blake, handshake_init_hash); +} + +/* Must hold peer->handshake.static_identity->lock */ +bool wg_noise_precompute_static_static(struct wg_peer *peer) +{ + bool ret; + + down_write(&peer->handshake.lock); + if (peer->handshake.static_identity->has_identity) { + ret = curve25519( + peer->handshake.precomputed_static_static, + peer->handshake.static_identity->static_private, + peer->handshake.remote_static); + } else { + u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; + + ret = curve25519(empty, empty, peer->handshake.remote_static); + memset(peer->handshake.precomputed_static_static, 0, + NOISE_PUBLIC_KEY_LEN); + } + up_write(&peer->handshake.lock); + return ret; +} + +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) +{ + memset(handshake, 0, sizeof(*handshake)); + init_rwsem(&handshake->lock); + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; + handshake->entry.peer = peer; + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); + if (peer_preshared_key) + memcpy(handshake->preshared_key, peer_preshared_key, + NOISE_SYMMETRIC_KEY_LEN); + handshake->static_identity = static_identity; + handshake->state = HANDSHAKE_ZEROED; + return wg_noise_precompute_static_static(peer); +} + +static void handshake_zero(struct noise_handshake *handshake) +{ + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->hash, 0, NOISE_HASH_LEN); + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); + handshake->remote_index = 0; + handshake->state = HANDSHAKE_ZEROED; +} + +void wg_noise_handshake_clear(struct noise_handshake *handshake) +{ + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + down_write(&handshake->lock); + handshake_zero(handshake); + up_write(&handshake->lock); + wg_index_hashtable_remove( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); +} + +static struct noise_keypair *keypair_create(struct wg_peer *peer) +{ + struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL); + + if (unlikely(!keypair)) + return NULL; + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; + kref_init(&keypair->refcount); + return keypair; +} + +static void keypair_free_rcu(struct rcu_head *rcu) +{ + kzfree(container_of(rcu, struct noise_keypair, rcu)); +} + +static void keypair_free_kref(struct kref *kref) +{ + struct noise_keypair *keypair = + container_of(kref, struct noise_keypair, refcount); + + net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n", + keypair->entry.peer->device->dev->name, + keypair->internal_id, + keypair->entry.peer->internal_id); + wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable, + &keypair->entry); + call_rcu(&keypair->rcu, keypair_free_rcu); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now) +{ + if (unlikely(!keypair)) + return; + if (unlikely(unreference_now)) + wg_index_hashtable_remove( + keypair->entry.peer->device->index_hashtable, + &keypair->entry); + kref_put(&keypair->refcount, keypair_free_kref); +} + +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking noise keypair reference without holding the RCU BH read lock"); + if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount))) + return NULL; + return keypair; +} + +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs) +{ + struct noise_keypair *old; + + spin_lock_bh(&keypairs->keypair_update_lock); + + /* We zero the next_keypair before zeroing the others, so that + * wg_noise_received_with_keypair returns early before subsequent ones + * are zeroed. + */ + old = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(old, true); + + old = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + RCU_INIT_POINTER(keypairs->current_keypair, NULL); + wg_noise_keypair_put(old, true); + + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + + wg_noise_handshake_clear(&peer->handshake); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + + spin_lock_bh(&peer->keypairs.keypair_update_lock); + keypair = rcu_dereference_protected(peer->keypairs.next_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + keypair = rcu_dereference_protected(peer->keypairs.current_keypair, + lockdep_is_held(&peer->keypairs.keypair_update_lock)); + if (keypair) + keypair->sending.is_valid = false; + spin_unlock_bh(&peer->keypairs.keypair_update_lock); +} + +static void add_new_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *new_keypair) +{ + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; + + spin_lock_bh(&keypairs->keypair_update_lock); + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + next_keypair = rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + current_keypair = rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + if (new_keypair->i_am_the_initiator) { + /* If we're the initiator, it means we've sent a handshake, and + * received a confirmation response, which means this new + * keypair can now be used. + */ + if (next_keypair) { + /* If there already was a next keypair pending, we + * demote it to be the previous keypair, and free the + * existing current. Note that this means KCI can result + * in this transition. It would perhaps be more sound to + * always just get rid of the unused next keypair + * instead of putting it in the previous slot, but this + * might be a bit less robust. Something to think about + * for the future. + */ + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + rcu_assign_pointer(keypairs->previous_keypair, + next_keypair); + wg_noise_keypair_put(current_keypair, true); + } else /* If there wasn't an existing next keypair, we replace + * the previous with the current one. + */ + rcu_assign_pointer(keypairs->previous_keypair, + current_keypair); + /* At this point we can get rid of the old previous keypair, and + * set up the new keypair. + */ + wg_noise_keypair_put(previous_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, new_keypair); + } else { + /* If we're the responder, it means we can't use the new keypair + * until we receive confirmation via the first data packet, so + * we get rid of the existing previous one, the possibly + * existing next one, and slide in the new next one. + */ + rcu_assign_pointer(keypairs->next_keypair, new_keypair); + wg_noise_keypair_put(next_keypair, true); + RCU_INIT_POINTER(keypairs->previous_keypair, NULL); + wg_noise_keypair_put(previous_keypair, true); + } + spin_unlock_bh(&keypairs->keypair_update_lock); +} + +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair) +{ + struct noise_keypair *old_keypair; + bool key_is_new; + + /* We first check without taking the spinlock. */ + key_is_new = received_keypair == + rcu_access_pointer(keypairs->next_keypair); + if (likely(!key_is_new)) + return false; + + spin_lock_bh(&keypairs->keypair_update_lock); + /* After locking, we double check that things didn't change from + * beneath us. + */ + if (unlikely(received_keypair != + rcu_dereference_protected(keypairs->next_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)))) { + spin_unlock_bh(&keypairs->keypair_update_lock); + return false; + } + + /* When we've finally received the confirmation, we slide the next + * into the current, the current into the previous, and get rid of + * the old previous. + */ + old_keypair = rcu_dereference_protected(keypairs->previous_keypair, + lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->previous_keypair, + rcu_dereference_protected(keypairs->current_keypair, + lockdep_is_held(&keypairs->keypair_update_lock))); + wg_noise_keypair_put(old_keypair, true); + rcu_assign_pointer(keypairs->current_keypair, received_keypair); + RCU_INIT_POINTER(keypairs->next_keypair, NULL); + + spin_unlock_bh(&keypairs->keypair_update_lock); + return true; +} + +/* Must hold static_identity->lock */ +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(static_identity->static_private, private_key, + NOISE_PUBLIC_KEY_LEN); + curve25519_clamp_secret(static_identity->static_private); + static_identity->has_identity = curve25519_generate_public( + static_identity->static_public, private_key); +} + +/* This is Hugo Krawczyk's HKDF: + * - https://eprint.iacr.org/2010/264.pdf + * - https://tools.ietf.org/html/rfc5869 + */ +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, + size_t first_len, size_t second_len, size_t third_len, + size_t data_len, const u8 chaining_key[NOISE_HASH_LEN]) +{ + u8 output[BLAKE2S_HASH_SIZE + 1]; + u8 secret[BLAKE2S_HASH_SIZE]; + + WARN_ON(IS_ENABLED(DEBUG) && + (first_len > BLAKE2S_HASH_SIZE || + second_len > BLAKE2S_HASH_SIZE || + third_len > BLAKE2S_HASH_SIZE || + ((second_len || second_dst || third_len || third_dst) && + (!first_len || !first_dst)) || + ((third_len || third_dst) && (!second_len || !second_dst)))); + + /* Extract entropy from data into secret */ + blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN); + + if (!first_dst || !first_len) + goto out; + + /* Expand first key: key = secret, data = 0x1 */ + output[0] = 1; + blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE); + memcpy(first_dst, output, first_len); + + if (!second_dst || !second_len) + goto out; + + /* Expand second key: key = secret, data = first-key || 0x2 */ + output[BLAKE2S_HASH_SIZE] = 2; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(second_dst, output, second_len); + + if (!third_dst || !third_len) + goto out; + + /* Expand third key: key = secret, data = second-key || 0x3 */ + output[BLAKE2S_HASH_SIZE] = 3; + blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1, + BLAKE2S_HASH_SIZE); + memcpy(third_dst, output, third_len); + +out: + /* Clear sensitive data from stack */ + memzero_explicit(secret, BLAKE2S_HASH_SIZE); + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1); +} + +static void symmetric_key_init(struct noise_symmetric_key *key) +{ + spin_lock_init(&key->counter.receive.lock); + atomic64_set(&key->counter.counter, 0); + memset(key->counter.receive.backtrack, 0, + sizeof(key->counter.receive.backtrack)); + key->birthdate = ktime_get_coarse_boottime_ns(); + key->is_valid = true; +} + +static void derive_keys(struct noise_symmetric_key *first_dst, + struct noise_symmetric_key *second_dst, + const u8 chaining_key[NOISE_HASH_LEN]) +{ + kdf(first_dst->key, second_dst->key, NULL, NULL, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, + chaining_key); + symmetric_key_init(first_dst); + symmetric_key_init(second_dst); +} + +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 private[NOISE_PUBLIC_KEY_LEN], + const u8 public[NOISE_PUBLIC_KEY_LEN]) +{ + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; + + if (unlikely(!curve25519(dh_calculation, private, public))) + return false; + kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key); + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); + return true; +} + +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + struct blake2s_state blake; + + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, hash, NOISE_HASH_LEN); + blake2s_update(&blake, src, src_len); + blake2s_final(&blake, hash); +} + +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + u8 temp_hash[NOISE_HASH_LEN]; + + kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key); + mix_hash(hash, temp_hash, NOISE_HASH_LEN); + memzero_explicit(temp_hash, NOISE_HASH_LEN); +} + +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN], + const u8 remote_static[NOISE_PUBLIC_KEY_LEN]) +{ + memcpy(hash, handshake_init_hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN); + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); +} + +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, + NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key); + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); +} + +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, + size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, + hash, NOISE_HASH_LEN, + 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, src_ciphertext, src_len); + return true; +} + +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], + const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], + u8 chaining_key[NOISE_HASH_LEN], + u8 hash[NOISE_HASH_LEN]) +{ + if (ephemeral_dst != ephemeral_src) + memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN); + kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, + NOISE_PUBLIC_KEY_LEN, chaining_key); +} + +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) +{ + struct timespec64 now; + + ktime_get_real_ts64(&now); + + /* In order to prevent some sort of infoleak from precise timers, we + * round down the nanoseconds part to the closest rounded-down power of + * two to the maximum initiations per second allowed anyway by the + * implementation. + */ + now.tv_nsec = ALIGN_DOWN(now.tv_nsec, + rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND)); + + /* https://cr.yp.to/libtai/tai64.html */ + *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec); + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec); +} + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake) +{ + u8 timestamp[NOISE_TIMESTAMP_LEN]; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (unlikely(!handshake->static_identity->has_identity)) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION); + + handshake_init(handshake->chaining_key, handshake->hash, + handshake->remote_static); + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* es */ + if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* s */ + message_encrypt(dst->encrypted_static, + handshake->static_identity->static_public, + NOISE_PUBLIC_KEY_LEN, key, handshake->hash); + + /* ss */ + kdf(handshake->chaining_key, key, NULL, + handshake->precomputed_static_static, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + handshake->chaining_key); + + /* {t} */ + tai64n_now(timestamp); + message_encrypt(dst->encrypted_timestamp, timestamp, + NOISE_TIMESTAMP_LEN, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_INITIATION; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg) +{ + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + bool replay_attack, flood_attack; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 s[NOISE_PUBLIC_KEY_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 t[NOISE_TIMESTAMP_LEN]; + u64 initiation_consumption; + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake_init(chaining_key, hash, wg->static_identity.static_public); + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* es */ + if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e)) + goto out; + + /* s */ + if (!message_decrypt(s, src->encrypted_static, + sizeof(src->encrypted_static), key, hash)) + goto out; + + /* Lookup which peer we're actually talking to */ + peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s); + if (!peer) + goto out; + handshake = &peer->handshake; + + /* ss */ + kdf(chaining_key, key, NULL, handshake->precomputed_static_static, + NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + + /* {t} */ + if (!message_decrypt(t, src->encrypted_timestamp, + sizeof(src->encrypted_timestamp), key, hash)) + goto out; + + down_read(&handshake->lock); + replay_attack = memcmp(t, handshake->latest_timestamp, + NOISE_TIMESTAMP_LEN) <= 0; + flood_attack = (s64)handshake->last_initiation_consumption + + NSEC_PER_SEC / INITIATIONS_PER_SECOND > + (s64)ktime_get_coarse_boottime_ns(); + up_read(&handshake->lock); + if (replay_attack || flood_attack) + goto out; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0) + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + if ((s64)(handshake->last_initiation_consumption - + (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0) + handshake->last_initiation_consumption = initiation_consumption; + handshake->state = HANDSHAKE_CONSUMED_INITIATION; + up_write(&handshake->lock); + ret_peer = peer; + +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + up_read(&wg->static_identity.lock); + if (!ret_peer) + wg_peer_put(peer); + return ret_peer; +} + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake) +{ + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + bool ret = false; + + /* We need to wait for crng _before_ taking any locks, since + * curve25519_generate_secret uses get_random_bytes_wait. + */ + wait_for_random_bytes(); + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) + goto out; + + dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE); + dst->receiver_index = handshake->remote_index; + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + if (!curve25519_generate_public(dst->unencrypted_ephemeral, + handshake->ephemeral_private)) + goto out; + message_ephemeral(dst->unencrypted_ephemeral, + dst->unencrypted_ephemeral, handshake->chaining_key, + handshake->hash); + + /* ee */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_ephemeral)) + goto out; + + /* se */ + if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, + handshake->remote_static)) + goto out; + + /* psk */ + mix_psk(handshake->chaining_key, handshake->hash, key, + handshake->preshared_key); + + /* {} */ + message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash); + + dst->sender_index = wg_index_hashtable_insert( + handshake->entry.peer->device->index_hashtable, + &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_RESPONSE; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + return ret; +} + +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg) +{ + enum noise_handshake_state state = HANDSHAKE_ZEROED; + struct wg_peer *peer = NULL, *ret_peer = NULL; + struct noise_handshake *handshake; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + + down_read(&wg->static_identity.lock); + + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake = (struct noise_handshake *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, + src->receiver_index, &peer); + if (unlikely(!handshake)) + goto out; + + down_read(&handshake->lock); + state = handshake->state; + memcpy(hash, handshake->hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, + NOISE_PUBLIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) + goto fail; + + /* e */ + message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash); + + /* ee */ + if (!mix_dh(chaining_key, NULL, ephemeral_private, e)) + goto fail; + + /* se */ + if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e)) + goto fail; + + /* psk */ + mix_psk(chaining_key, hash, key, handshake->preshared_key); + + /* {} */ + if (!message_decrypt(NULL, src->encrypted_nothing, + sizeof(src->encrypted_nothing), key, hash)) + goto fail; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + /* It's important to check that the state is still the same, while we + * have an exclusive lock. + */ + if (handshake->state != state) { + up_write(&handshake->lock); + goto fail; + } + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; + up_write(&handshake->lock); + ret_peer = peer; + goto out; + +fail: + wg_peer_put(peer); +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; +} + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs) +{ + struct noise_keypair *new_keypair; + bool ret = false; + + down_write(&handshake->lock); + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && + handshake->state != HANDSHAKE_CONSUMED_RESPONSE) + goto out; + + new_keypair = keypair_create(handshake->entry.peer); + if (!new_keypair) + goto out; + new_keypair->i_am_the_initiator = handshake->state == + HANDSHAKE_CONSUMED_RESPONSE; + new_keypair->remote_index = handshake->remote_index; + + if (new_keypair->i_am_the_initiator) + derive_keys(&new_keypair->sending, &new_keypair->receiving, + handshake->chaining_key); + else + derive_keys(&new_keypair->receiving, &new_keypair->sending, + handshake->chaining_key); + + handshake_zero(handshake); + rcu_read_lock_bh(); + if (likely(!READ_ONCE(container_of(handshake, struct wg_peer, + handshake)->is_dead))) { + add_new_keypair(keypairs, new_keypair); + net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n", + handshake->entry.peer->device->dev->name, + new_keypair->internal_id, + handshake->entry.peer->internal_id); + ret = wg_index_hashtable_replace( + handshake->entry.peer->device->index_hashtable, + &handshake->entry, &new_keypair->entry); + } else { + kzfree(new_keypair); + } + rcu_read_unlock_bh(); + +out: + up_write(&handshake->lock); + return ret; +} diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h new file mode 100644 index 000000000000..138a07bb817c --- /dev/null +++ b/drivers/net/wireguard/noise.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ +#ifndef _WG_NOISE_H +#define _WG_NOISE_H + +#include "messages.h" +#include "peerlookup.h" + +#include +#include +#include +#include +#include +#include + +union noise_counter { + struct { + u64 counter; + unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG]; + spinlock_t lock; + } receive; + atomic64_t counter; +}; + +struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + union noise_counter counter; + u64 birthdate; + bool is_valid; +}; + +struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; + struct noise_symmetric_key receiving; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; + struct rcu_head rcu; + u64 internal_id; +}; + +struct noise_keypairs { + struct noise_keypair __rcu *current_keypair; + struct noise_keypair __rcu *previous_keypair; + struct noise_keypair __rcu *next_keypair; + spinlock_t keypair_update_lock; +}; + +struct noise_static_identity { + u8 static_public[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + struct rw_semaphore lock; + bool has_identity; +}; + +enum noise_handshake_state { + HANDSHAKE_ZEROED, + HANDSHAKE_CREATED_INITIATION, + HANDSHAKE_CONSUMED_INITIATION, + HANDSHAKE_CREATED_RESPONSE, + HANDSHAKE_CONSUMED_RESPONSE +}; + +struct noise_handshake { + struct index_hashtable_entry entry; + + enum noise_handshake_state state; + u64 last_initiation_consumption; + + struct noise_static_identity *static_identity; + + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN]; + + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; + __le32 remote_index; + + /* Protects all members except the immutable (after noise_handshake_ + * init): remote_static, precomputed_static_static, static_identity. + */ + struct rw_semaphore lock; +}; + +struct wg_device; + +void wg_noise_init(void); +bool wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); +void wg_noise_handshake_clear(struct noise_handshake *handshake); +static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) +{ + atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() - + (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC); +} + +void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now); +struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair); +void wg_noise_keypairs_clear(struct noise_keypairs *keypairs); +bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs, + struct noise_keypair *received_keypair); +void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); + +void wg_noise_set_static_identity_private_key( + struct noise_static_identity *static_identity, + const u8 private_key[NOISE_PUBLIC_KEY_LEN]); +bool wg_noise_precompute_static_static(struct wg_peer *peer); + +bool +wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, + struct wg_device *wg); + +bool wg_noise_handshake_create_response(struct message_handshake_response *dst, + struct noise_handshake *handshake); +struct wg_peer * +wg_noise_handshake_consume_response(struct message_handshake_response *src, + struct wg_device *wg); + +bool wg_noise_handshake_begin_session(struct noise_handshake *handshake, + struct noise_keypairs *keypairs); + +#endif /* _WG_NOISE_H */ diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c new file mode 100644 index 000000000000..071eedf33f5a --- /dev/null +++ b/drivers/net/wireguard/peer.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "peer.h" +#include "device.h" +#include "queueing.h" +#include "timers.h" +#include "peerlookup.h" +#include "noise.h" + +#include +#include +#include +#include + +static atomic64_t peer_counter = ATOMIC64_INIT(0); + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) +{ + struct wg_peer *peer; + int ret = -ENOMEM; + + lockdep_assert_held(&wg->device_update_lock); + + if (wg->num_peers >= MAX_PEERS_PER_DEVICE) + return ERR_PTR(ret); + + peer = kzalloc(sizeof(*peer), GFP_KERNEL); + if (unlikely(!peer)) + return ERR_PTR(ret); + peer->device = wg; + + if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer)) { + ret = -EKEYREJECTED; + goto err_1; + } + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + goto err_1; + if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, + MAX_QUEUED_PACKETS)) + goto err_2; + if (wg_packet_queue_init(&peer->rx_queue, NULL, false, + MAX_QUEUED_PACKETS)) + goto err_3; + + peer->internal_id = atomic64_inc_return(&peer_counter); + peer->serial_work_cpu = nr_cpumask_bits; + wg_cookie_init(&peer->latest_cookie); + wg_timers_init(peer); + wg_cookie_checker_precompute_peer_keys(peer); + spin_lock_init(&peer->keypairs.keypair_update_lock); + INIT_WORK(&peer->transmit_handshake_work, + wg_packet_handshake_send_worker); + rwlock_init(&peer->endpoint_lock); + kref_init(&peer->refcount); + skb_queue_head_init(&peer->staged_packet_queue); + wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); + set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state); + netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll, + NAPI_POLL_WEIGHT); + napi_enable(&peer->napi); + list_add_tail(&peer->peer_list, &wg->peer_list); + INIT_LIST_HEAD(&peer->allowedips_list); + wg_pubkey_hashtable_add(wg->peer_hashtable, peer); + ++wg->num_peers; + pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); + return peer; + +err_3: + wg_packet_queue_free(&peer->tx_queue, false); +err_2: + dst_cache_destroy(&peer->endpoint_cache); +err_1: + kfree(peer); + return ERR_PTR(ret); +} + +struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), + "Taking peer reference without holding the RCU read lock"); + if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount))) + return NULL; + return peer; +} + +static void peer_make_dead(struct wg_peer *peer) +{ + /* Remove from configuration-time lookup structures. */ + list_del_init(&peer->peer_list); + wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer, + &peer->device->device_update_lock); + wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer); + + /* Mark as dead, so that we don't allow jumping contexts after. */ + WRITE_ONCE(peer->is_dead, true); + + /* The caller must now synchronize_rcu() for this to take effect. */ +} + +static void peer_remove_after_dead(struct wg_peer *peer) +{ + WARN_ON(!peer->is_dead); + + /* No more keypairs can be created for this peer, since is_dead protects + * add_new_keypair, so we can now destroy existing ones. + */ + wg_noise_keypairs_clear(&peer->keypairs); + + /* Destroy all ongoing timers that were in-flight at the beginning of + * this function. + */ + wg_timers_stop(peer); + + /* The transition between packet encryption/decryption queues isn't + * guarded by is_dead, but each reference's life is strictly bounded by + * two generations: once for parallel crypto and once for serial + * ingestion, so we can simply flush twice, and be sure that we no + * longer have references inside these queues. + */ + + /* a) For encrypt/decrypt. */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.1) For send (but not receive, since that's napi). */ + flush_workqueue(peer->device->packet_crypt_wq); + /* b.2.1) For receive (but not send, since that's wq). */ + napi_disable(&peer->napi); + /* b.2.1) It's now safe to remove the napi struct, which must be done + * here from process context. + */ + netif_napi_del(&peer->napi); + + /* Ensure any workstructs we own (like transmit_handshake_work or + * clear_peer_work) no longer are in use. + */ + flush_workqueue(peer->device->handshake_send_wq); + + /* After the above flushes, a peer might still be active in a few + * different contexts: 1) from xmit(), before hitting is_dead and + * returning, 2) from wg_packet_consume_data(), before hitting is_dead + * and returning, 3) from wg_receive_handshake_packet() after a point + * where it has processed an incoming handshake packet, but where + * all calls to pass it off to timers fails because of is_dead. We won't + * have new references in (1) eventually, because we're removed from + * allowedips; we won't have new references in (2) eventually, because + * wg_index_hashtable_lookup will always return NULL, since we removed + * all existing keypairs and no more can be created; we won't have new + * references in (3) eventually, because we're removed from the pubkey + * hash table, which allows for a maximum of one handshake response, + * via the still-uncleared index hashtable entry, but not more than one, + * and in wg_cookie_message_consume, the lookup eventually gets a peer + * with a refcount of zero, so no new reference is taken. + */ + + --peer->device->num_peers; + wg_peer_put(peer); +} + +/* We have a separate "remove" function make sure that all active places where + * a peer is currently operating will eventually come to an end and not pass + * their reference onto another context. + */ +void wg_peer_remove(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + lockdep_assert_held(&peer->device->device_update_lock); + + peer_make_dead(peer); + synchronize_rcu(); + peer_remove_after_dead(peer); +} + +void wg_peer_remove_all(struct wg_device *wg) +{ + struct wg_peer *peer, *temp; + LIST_HEAD(dead_peers); + + lockdep_assert_held(&wg->device_update_lock); + + /* Avoid having to traverse individually for each one. */ + wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock); + + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { + peer_make_dead(peer); + list_add_tail(&peer->peer_list, &dead_peers); + } + synchronize_rcu(); + list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) + peer_remove_after_dead(peer); +} + +static void rcu_release(struct rcu_head *rcu) +{ + struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); + + dst_cache_destroy(&peer->endpoint_cache); + wg_packet_queue_free(&peer->rx_queue, false); + wg_packet_queue_free(&peer->tx_queue, false); + + /* The final zeroing takes care of clearing any remaining handshake key + * material and other potentially sensitive information. + */ + kzfree(peer); +} + +static void kref_release(struct kref *refcount) +{ + struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount); + + pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + /* Remove ourself from dynamic runtime lookup structures, now that the + * last reference is gone. + */ + wg_index_hashtable_remove(peer->device->index_hashtable, + &peer->handshake.entry); + + /* Remove any lingering packets that didn't have a chance to be + * transmitted. + */ + wg_packet_purge_staged_packets(peer); + + /* Free the memory used. */ + call_rcu(&peer->rcu, rcu_release); +} + +void wg_peer_put(struct wg_peer *peer) +{ + if (unlikely(!peer)) + return; + kref_put(&peer->refcount, kref_release); +} diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h new file mode 100644 index 000000000000..23af40922997 --- /dev/null +++ b/drivers/net/wireguard/peer.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_PEER_H +#define _WG_PEER_H + +#include "device.h" +#include "noise.h" +#include "cookie.h" + +#include +#include +#include +#include +#include + +struct wg_device; + +struct endpoint { + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + }; + union { + struct { + struct in_addr src4; + /* Essentially the same as addr6->scope_id */ + int src_if4; + }; + struct in6_addr src6; + }; +}; + +struct wg_peer { + struct wg_device *device; + struct crypt_queue tx_queue, rx_queue; + struct sk_buff_head staged_packet_queue; + int serial_work_cpu; + struct noise_keypairs keypairs; + struct endpoint endpoint; + struct dst_cache endpoint_cache; + rwlock_t endpoint_lock; + struct noise_handshake handshake; + atomic64_t last_sent_handshake; + struct work_struct transmit_handshake_work, clear_peer_work; + struct cookie latest_cookie; + struct hlist_node pubkey_hash; + u64 rx_bytes, tx_bytes; + struct timer_list timer_retransmit_handshake, timer_send_keepalive; + struct timer_list timer_new_handshake, timer_zero_key_material; + struct timer_list timer_persistent_keepalive; + unsigned int timer_handshake_attempts; + u16 persistent_keepalive_interval; + bool timer_need_another_keepalive; + bool sent_lastminute_handshake; + struct timespec64 walltime_last_handshake; + struct kref refcount; + struct rcu_head rcu; + struct list_head peer_list; + struct list_head allowedips_list; + u64 internal_id; + struct napi_struct napi; + bool is_dead; +}; + +struct wg_peer *wg_peer_create(struct wg_device *wg, + const u8 public_key[NOISE_PUBLIC_KEY_LEN], + const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); + +struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer); +static inline struct wg_peer *wg_peer_get(struct wg_peer *peer) +{ + kref_get(&peer->refcount); + return peer; +} +void wg_peer_put(struct wg_peer *peer); +void wg_peer_remove(struct wg_peer *peer); +void wg_peer_remove_all(struct wg_device *wg); + +#endif /* _WG_PEER_H */ diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c new file mode 100644 index 000000000000..e4deb331476b --- /dev/null +++ b/drivers/net/wireguard/peerlookup.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "peerlookup.h" +#include "peer.h" +#include "noise.h" + +static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + /* siphash gives us a secure 64bit number based on a random key. Since + * the bits are uniformly distributed, we can then mask off to get the + * bits we need. + */ + const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key); + + return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)]; +} + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void) +{ + struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + get_random_bytes(&table->key, sizeof(table->key)); + hash_init(table->hashtable); + mutex_init(&table->lock); + return table; +} + +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_add_head_rcu(&peer->pubkey_hash, + pubkey_bucket(table, peer->handshake.remote_static)); + mutex_unlock(&table->lock); +} + +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer) +{ + mutex_lock(&table->lock); + hlist_del_init_rcu(&peer->pubkey_hash); + mutex_unlock(&table->lock); +} + +/* Returns a strong reference to a peer */ +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + struct wg_peer *iter_peer, *peer = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey), + pubkey_hash) { + if (!memcmp(pubkey, iter_peer->handshake.remote_static, + NOISE_PUBLIC_KEY_LEN)) { + peer = iter_peer; + break; + } + } + peer = wg_peer_get_maybe_zero(peer); + rcu_read_unlock_bh(); + return peer; +} + +static struct hlist_head *index_bucket(struct index_hashtable *table, + const __le32 index) +{ + /* Since the indices are random and thus all bits are uniformly + * distributed, we can find its bucket simply by masking. + */ + return &table->hashtable[(__force u32)index & + (HASH_SIZE(table->hashtable) - 1)]; +} + +struct index_hashtable *wg_index_hashtable_alloc(void) +{ + struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL); + + if (!table) + return NULL; + + hash_init(table->hashtable); + spin_lock_init(&table->lock); + return table; +} + +/* At the moment, we limit ourselves to 2^20 total peers, which generally might + * amount to 2^20*3 items in this hashtable. The algorithm below works by + * picking a random number and testing it. We can see that these limits mean we + * usually succeed pretty quickly: + * + * >>> def calculation(tries, size): + * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32)) + * ... + * >>> calculation(1, 2**20 * 3) + * 0.999267578125 + * >>> calculation(2, 2**20 * 3) + * 0.0007318854331970215 + * >>> calculation(3, 2**20 * 3) + * 5.360489012673497e-07 + * >>> calculation(4, 2**20 * 3) + * 3.9261394135792216e-10 + * + * At the moment, we don't do any masking, so this algorithm isn't exactly + * constant time in either the random guessing or in the hash list lookup. We + * could require a minimum of 3 tries, which would successfully mask the + * guessing. this would not, however, help with the growing hash lengths, which + * is another thing to consider moving forward. + */ + +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + struct index_hashtable_entry *existing_entry; + + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); + + rcu_read_lock_bh(); + +search_unused_slot: + /* First we try to find an unused slot, randomly, while unlocked. */ + entry->index = (__force __le32)get_random_u32(); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) + /* If it's already in use, we continue searching. */ + goto search_unused_slot; + } + + /* Once we've found an unused slot, we lock it, and then double-check + * that nobody else stole it from us. + */ + spin_lock_bh(&table->lock); + hlist_for_each_entry_rcu_bh(existing_entry, + index_bucket(table, entry->index), + index_hash) { + if (existing_entry->index == entry->index) { + spin_unlock_bh(&table->lock); + /* If it was stolen, we start over. */ + goto search_unused_slot; + } + } + /* Otherwise, we know we have it exclusively (since we're locked), + * so we insert. + */ + hlist_add_head_rcu(&entry->index_hash, + index_bucket(table, entry->index)); + spin_unlock_bh(&table->lock); + + rcu_read_unlock_bh(); + + return entry->index; +} + +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new) +{ + if (unlikely(hlist_unhashed(&old->index_hash))) + return false; + spin_lock_bh(&table->lock); + new->index = old->index; + hlist_replace_rcu(&old->index_hash, &new->index_hash); + + /* Calling init here NULLs out index_hash, and in fact after this + * function returns, it's theoretically possible for this to get + * reinserted elsewhere. That means the RCU lookup below might either + * terminate early or jump between buckets, in which case the packet + * simply gets dropped, which isn't terrible. + */ + INIT_HLIST_NODE(&old->index_hash); + spin_unlock_bh(&table->lock); + return true; +} + +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry) +{ + spin_lock_bh(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock_bh(&table->lock); +} + +/* Returns a strong reference to a entry->peer */ +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer) +{ + struct index_hashtable_entry *iter_entry, *entry = NULL; + + rcu_read_lock_bh(); + hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index), + index_hash) { + if (iter_entry->index == index) { + if (likely(iter_entry->type & type_mask)) + entry = iter_entry; + break; + } + } + if (likely(entry)) { + entry->peer = wg_peer_get_maybe_zero(entry->peer); + if (likely(entry->peer)) + *peer = entry->peer; + else + entry = NULL; + } + rcu_read_unlock_bh(); + return entry; +} diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h new file mode 100644 index 000000000000..ced811797680 --- /dev/null +++ b/drivers/net/wireguard/peerlookup.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_PEERLOOKUP_H +#define _WG_PEERLOOKUP_H + +#include "messages.h" + +#include +#include +#include + +struct wg_peer; + +struct pubkey_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 11); + siphash_key_t key; + struct mutex lock; +}; + +struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void); +void wg_pubkey_hashtable_add(struct pubkey_hashtable *table, + struct wg_peer *peer); +void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table, + struct wg_peer *peer); +struct wg_peer * +wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table, + const u8 pubkey[NOISE_PUBLIC_KEY_LEN]); + +struct index_hashtable { + /* TODO: move to rhashtable */ + DECLARE_HASHTABLE(hashtable, 13); + spinlock_t lock; +}; + +enum index_hashtable_type { + INDEX_HASHTABLE_HANDSHAKE = 1U << 0, + INDEX_HASHTABLE_KEYPAIR = 1U << 1 +}; + +struct index_hashtable_entry { + struct wg_peer *peer; + struct hlist_node index_hash; + enum index_hashtable_type type; + __le32 index; +}; + +struct index_hashtable *wg_index_hashtable_alloc(void); +__le32 wg_index_hashtable_insert(struct index_hashtable *table, + struct index_hashtable_entry *entry); +bool wg_index_hashtable_replace(struct index_hashtable *table, + struct index_hashtable_entry *old, + struct index_hashtable_entry *new); +void wg_index_hashtable_remove(struct index_hashtable *table, + struct index_hashtable_entry *entry); +struct index_hashtable_entry * +wg_index_hashtable_lookup(struct index_hashtable *table, + const enum index_hashtable_type type_mask, + const __le32 index, struct wg_peer **peer); + +#endif /* _WG_PEERLOOKUP_H */ diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c new file mode 100644 index 000000000000..5c964fcb994e --- /dev/null +++ b/drivers/net/wireguard/queueing.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" + +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) +{ + int cpu; + struct multicore_worker __percpu *worker = + alloc_percpu(struct multicore_worker); + + if (!worker) + return NULL; + + for_each_possible_cpu(cpu) { + per_cpu_ptr(worker, cpu)->ptr = ptr; + INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function); + } + return worker; +} + +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len) +{ + int ret; + + memset(queue, 0, sizeof(*queue)); + ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); + if (ret) + return ret; + if (function) { + if (multicore) { + queue->worker = wg_packet_percpu_multicore_worker_alloc( + function, queue); + if (!queue->worker) + return -ENOMEM; + } else { + INIT_WORK(&queue->work, function); + } + } + return 0; +} + +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) +{ + if (multicore) + free_percpu(queue->worker); + WARN_ON(!__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, NULL); +} diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h new file mode 100644 index 000000000000..fecb559cbdb6 --- /dev/null +++ b/drivers/net/wireguard/queueing.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_QUEUEING_H +#define _WG_QUEUEING_H + +#include "peer.h" +#include +#include +#include +#include + +struct wg_device; +struct wg_peer; +struct multicore_worker; +struct crypt_queue; +struct sk_buff; + +/* queueing.c APIs: */ +int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, + bool multicore, unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); +struct multicore_worker __percpu * +wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); + +/* receive.c APIs: */ +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb); +void wg_packet_handshake_receive_worker(struct work_struct *work); +/* NAPI poll function: */ +int wg_packet_rx_poll(struct napi_struct *napi, int budget); +/* Workqueue worker: */ +void wg_packet_decrypt_worker(struct work_struct *work); + +/* send.c APIs: */ +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry); +void wg_packet_send_handshake_response(struct wg_peer *peer); +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index); +void wg_packet_send_keepalive(struct wg_peer *peer); +void wg_packet_purge_staged_packets(struct wg_peer *peer); +void wg_packet_send_staged_packets(struct wg_peer *peer); +/* Workqueue workers: */ +void wg_packet_handshake_send_worker(struct work_struct *work); +void wg_packet_tx_worker(struct work_struct *work); +void wg_packet_encrypt_worker(struct work_struct *work); + +enum packet_state { + PACKET_STATE_UNCRYPTED, + PACKET_STATE_CRYPTED, + PACKET_STATE_DEAD +}; + +struct packet_cb { + u64 nonce; + struct noise_keypair *keypair; + atomic_t state; + u32 mtu; + u8 ds; +}; + +#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb)) +#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) + +/* Returns either the correct skb->protocol value, or 0 if invalid. */ +static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) +{ + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct iphdr)) <= + skb_tail_pointer(skb) && + ip_hdr(skb)->version == 4) + return htons(ETH_P_IP); + if (skb_network_header(skb) >= skb->head && + (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= + skb_tail_pointer(skb) && + ipv6_hdr(skb)->version == 6) + return htons(ETH_P_IPV6); + return 0; +} + +static inline void wg_reset_packet(struct sk_buff *skb) +{ + skb_scrub_packet(skb, true); + memset(&skb->headers_start, 0, + offsetof(struct sk_buff, headers_end) - + offsetof(struct sk_buff, headers_start)); + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; +#ifdef CONFIG_NET_SCHED + skb->tc_index = 0; + skb_reset_tc(skb); +#endif + skb->hdr_len = skb_headroom(skb); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_probe_transport_header(skb); + skb_reset_inner_headers(skb); +} + +static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id) +{ + unsigned int cpu = *stored_cpu, cpu_index, i; + + if (unlikely(cpu == nr_cpumask_bits || + !cpumask_test_cpu(cpu, cpu_online_mask))) { + cpu_index = id % cpumask_weight(cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); + for (i = 0; i < cpu_index; ++i) + cpu = cpumask_next(cpu, cpu_online_mask); + *stored_cpu = cpu; + } + return cpu; +} + +/* This function is racy, in the sense that next is unlocked, so it could return + * the same CPU twice. A race-free version of this would be to instead store an + * atomic sequence number, do an increment-and-return, and then iterate through + * every possible CPU until we get to that index -- choose_cpu. However that's + * a bit slower, and it doesn't seem like this potential race actually + * introduces any performance loss, so we live with it. + */ +static inline int wg_cpumask_next_online(int *next) +{ + int cpu = *next; + + while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask))) + cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits; + return cpu; +} + +static inline int wg_queue_enqueue_per_device_and_peer( + struct crypt_queue *device_queue, struct crypt_queue *peer_queue, + struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) +{ + int cpu; + + atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED); + /* We first queue this up for the peer ingestion, but the consumer + * will wait for the state to change to CRYPTED or DEAD before. + */ + if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the + * packet as soon as it can. + */ + cpu = wg_cpumask_next_online(next_cpu); + if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb))) + return -EPIPE; + queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work); + return 0; +} + +static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, + struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, + peer->internal_id), + peer->device->packet_crypt_wq, &queue->work); + wg_peer_put(peer); +} + +static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, + enum packet_state state) +{ + /* We take a reference, because as soon as we call atomic_set, the + * peer can be freed from below us. + */ + struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); + + atomic_set_release(&PACKET_CB(skb)->state, state); + napi_schedule(&peer->napi); + wg_peer_put(peer); +} + +#ifdef DEBUG +bool wg_packet_counter_selftest(void); +#endif + +#endif /* _WG_QUEUEING_H */ diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c new file mode 100644 index 000000000000..3fedd1d21f5e --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "ratelimiter.h" +#include +#include +#include +#include + +static struct kmem_cache *entry_cache; +static hsiphash_key_t key; +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock"); +static DEFINE_MUTEX(init_lock); +static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */ +static atomic_t total_entries = ATOMIC_INIT(0); +static unsigned int max_entries, table_size; +static void wg_ratelimiter_gc_entries(struct work_struct *); +static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries); +static struct hlist_head *table_v4; +#if IS_ENABLED(CONFIG_IPV6) +static struct hlist_head *table_v6; +#endif + +struct ratelimiter_entry { + u64 last_time_ns, tokens, ip; + void *net; + spinlock_t lock; + struct hlist_node hash; + struct rcu_head rcu; +}; + +enum { + PACKETS_PER_SECOND = 20, + PACKETS_BURSTABLE = 5, + PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND, + TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE +}; + +static void entry_free(struct rcu_head *rcu) +{ + kmem_cache_free(entry_cache, + container_of(rcu, struct ratelimiter_entry, rcu)); + atomic_dec(&total_entries); +} + +static void entry_uninit(struct ratelimiter_entry *entry) +{ + hlist_del_rcu(&entry->hash); + call_rcu(&entry->rcu, entry_free); +} + +/* Calling this function with a NULL work uninits all entries. */ +static void wg_ratelimiter_gc_entries(struct work_struct *work) +{ + const u64 now = ktime_get_coarse_boottime_ns(); + struct ratelimiter_entry *entry; + struct hlist_node *temp; + unsigned int i; + + for (i = 0; i < table_size; ++i) { + spin_lock(&table_lock); + hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#if IS_ENABLED(CONFIG_IPV6) + hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) { + if (unlikely(!work) || + now - entry->last_time_ns > NSEC_PER_SEC) + entry_uninit(entry); + } +#endif + spin_unlock(&table_lock); + if (likely(work)) + cond_resched(); + } + if (likely(work)) + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); +} + +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net) +{ + /* We only take the bottom half of the net pointer, so that we can hash + * 3 words in the end. This way, siphash's len param fits into the final + * u32, and we don't incur an extra round. + */ + const u32 net_word = (unsigned long)net; + struct ratelimiter_entry *entry; + struct hlist_head *bucket; + u64 ip; + + if (skb->protocol == htons(ETH_P_IP)) { + ip = (u64 __force)ip_hdr(skb)->saddr; + bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) & + (table_size - 1)]; + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + /* Only use 64 bits, so as to ratelimit the whole /64. */ + memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip)); + bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) & + (table_size - 1)]; + } +#endif + else + return false; + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, bucket, hash) { + if (entry->net == net && entry->ip == ip) { + u64 now, tokens; + bool ret; + /* Quasi-inspired by nft_limit.c, but this is actually a + * slightly different algorithm. Namely, we incorporate + * the burst as part of the maximum tokens, rather than + * as part of the rate. + */ + spin_lock(&entry->lock); + now = ktime_get_coarse_boottime_ns(); + tokens = min_t(u64, TOKEN_MAX, + entry->tokens + now - + entry->last_time_ns); + entry->last_time_ns = now; + ret = tokens >= PACKET_COST; + entry->tokens = ret ? tokens - PACKET_COST : tokens; + spin_unlock(&entry->lock); + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + + if (atomic_inc_return(&total_entries) > max_entries) + goto err_oom; + + entry = kmem_cache_alloc(entry_cache, GFP_KERNEL); + if (unlikely(!entry)) + goto err_oom; + + entry->net = net; + entry->ip = ip; + INIT_HLIST_NODE(&entry->hash); + spin_lock_init(&entry->lock); + entry->last_time_ns = ktime_get_coarse_boottime_ns(); + entry->tokens = TOKEN_MAX - PACKET_COST; + spin_lock(&table_lock); + hlist_add_head_rcu(&entry->hash, bucket); + spin_unlock(&table_lock); + return true; + +err_oom: + atomic_dec(&total_entries); + return false; +} + +int wg_ratelimiter_init(void) +{ + mutex_lock(&init_lock); + if (++init_refcnt != 1) + goto out; + + entry_cache = KMEM_CACHE(ratelimiter_entry, 0); + if (!entry_cache) + goto err; + + /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting, + * but what it shares in common is that it uses a massive hashtable. So, + * we borrow their wisdom about good table sizes on different systems + * dependent on RAM. This calculation here comes from there. + */ + table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 : + max_t(unsigned long, 16, roundup_pow_of_two( + (totalram_pages() << PAGE_SHIFT) / + (1U << 14) / sizeof(struct hlist_head))); + max_entries = table_size * 8; + + table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + if (unlikely(!table_v4)) + goto err_kmemcache; + +#if IS_ENABLED(CONFIG_IPV6) + table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + if (unlikely(!table_v6)) { + kvfree(table_v4); + goto err_kmemcache; + } +#endif + + queue_delayed_work(system_power_efficient_wq, &gc_work, HZ); + get_random_bytes(&key, sizeof(key)); +out: + mutex_unlock(&init_lock); + return 0; + +err_kmemcache: + kmem_cache_destroy(entry_cache); +err: + --init_refcnt; + mutex_unlock(&init_lock); + return -ENOMEM; +} + +void wg_ratelimiter_uninit(void) +{ + mutex_lock(&init_lock); + if (!init_refcnt || --init_refcnt) + goto out; + + cancel_delayed_work_sync(&gc_work); + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + kvfree(table_v4); +#if IS_ENABLED(CONFIG_IPV6) + kvfree(table_v6); +#endif + kmem_cache_destroy(entry_cache); +out: + mutex_unlock(&init_lock); +} + +#include "selftest/ratelimiter.c" diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h new file mode 100644 index 000000000000..83067f71ea99 --- /dev/null +++ b/drivers/net/wireguard/ratelimiter.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_RATELIMITER_H +#define _WG_RATELIMITER_H + +#include + +int wg_ratelimiter_init(void); +void wg_ratelimiter_uninit(void); +bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net); + +#ifdef DEBUG +bool wg_ratelimiter_selftest(void); +#endif + +#endif /* _WG_RATELIMITER_H */ diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c new file mode 100644 index 000000000000..4a153894cee2 --- /dev/null +++ b/drivers/net/wireguard/receive.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "device.h" +#include "peer.h" +#include "timers.h" +#include "messages.h" +#include "cookie.h" +#include "socket.h" + +#include +#include +#include +#include + +/* Must be called with bh disabled. */ +static void update_rx_stats(struct wg_peer *peer, size_t len) +{ + struct pcpu_sw_netstats *tstats = + get_cpu_ptr(peer->device->dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + ++tstats->rx_packets; + tstats->rx_bytes += len; + peer->rx_bytes += len; + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); +} + +#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type) + +static size_t validate_header_len(struct sk_buff *skb) +{ + if (unlikely(skb->len < sizeof(struct message_header))) + return 0; + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) && + skb->len >= MESSAGE_MINIMUM_LENGTH) + return sizeof(struct message_data); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && + skb->len == sizeof(struct message_handshake_initiation)) + return sizeof(struct message_handshake_initiation); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && + skb->len == sizeof(struct message_handshake_response)) + return sizeof(struct message_handshake_response); + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && + skb->len == sizeof(struct message_handshake_cookie)) + return sizeof(struct message_handshake_cookie); + return 0; +} + +static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) +{ + size_t data_offset, data_len, header_len; + struct udphdr *udp; + + if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || + skb_transport_header(skb) < skb->head || + (skb_transport_header(skb) + sizeof(struct udphdr)) > + skb_tail_pointer(skb))) + return -EINVAL; /* Bogus IP header */ + udp = udp_hdr(skb); + data_offset = (u8 *)udp - skb->data; + if (unlikely(data_offset > U16_MAX || + data_offset + sizeof(struct udphdr) > skb->len)) + /* Packet has offset at impossible location or isn't big enough + * to have UDP fields. + */ + return -EINVAL; + data_len = ntohs(udp->len); + if (unlikely(data_len < sizeof(struct udphdr) || + data_len > skb->len - data_offset)) + /* UDP packet is reporting too small of a size or lying about + * its size. + */ + return -EINVAL; + data_len -= sizeof(struct udphdr); + data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; + if (unlikely(!pskb_may_pull(skb, + data_offset + sizeof(struct message_header)) || + pskb_trim(skb, data_len + data_offset) < 0)) + return -EINVAL; + skb_pull(skb, data_offset); + if (unlikely(skb->len != data_len)) + /* Final len does not agree with calculated len */ + return -EINVAL; + header_len = validate_header_len(skb); + if (unlikely(!header_len)) + return -EINVAL; + __skb_push(skb, data_offset); + if (unlikely(!pskb_may_pull(skb, data_offset + header_len))) + return -EINVAL; + __skb_pull(skb, data_offset); + return 0; +} + +static void wg_receive_handshake_packet(struct wg_device *wg, + struct sk_buff *skb) +{ + enum cookie_mac_state mac_state; + struct wg_peer *peer = NULL; + /* This is global, so that our load calculation applies to the whole + * system. We don't care about races with it at all. + */ + static u64 last_under_load; + bool packet_needs_cookie; + bool under_load; + + if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) { + net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", + wg->dev->name, skb); + wg_cookie_message_consume( + (struct message_handshake_cookie *)skb->data, wg); + return; + } + + under_load = skb_queue_len(&wg->incoming_handshakes) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; + if (under_load) { + last_under_load = ktime_get_coarse_boottime_ns(); + } else if (last_under_load) { + under_load = !wg_birthdate_has_expired(last_under_load, 1); + if (!under_load) + last_under_load = 0; + } + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb, + under_load); + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || + (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) { + packet_needs_cookie = false; + } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) { + packet_needs_cookie = true; + } else { + net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", + wg->dev->name, skb); + return; + } + + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): { + struct message_handshake_initiation *message = + (struct message_handshake_initiation *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_initiation(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + wg_packet_send_handshake_response(peer); + break; + } + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): { + struct message_handshake_response *message = + (struct message_handshake_response *)skb->data; + + if (packet_needs_cookie) { + wg_packet_send_handshake_cookie(wg, skb, + message->sender_index); + return; + } + peer = wg_noise_handshake_consume_response(message, wg); + if (unlikely(!peer)) { + net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", + wg->dev->name, skb); + return; + } + wg_socket_set_peer_endpoint_from_skb(peer, skb); + net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n", + wg->dev->name, peer->internal_id, + &peer->endpoint.addr); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_handshake_complete(peer); + /* Calling this function will either send any existing + * packets in the queue and not send a keepalive, which + * is the best case, Or, if there's nothing in the + * queue, it will send a keepalive, in order to give + * immediate confirmation of the session. + */ + wg_packet_send_keepalive(peer); + } + break; + } + } + + if (unlikely(!peer)) { + WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n"); + return; + } + + local_bh_disable(); + update_rx_stats(peer, skb->len); + local_bh_enable(); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_peer_put(peer); +} + +void wg_packet_handshake_receive_worker(struct work_struct *work) +{ + struct wg_device *wg = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + wg_receive_handshake_packet(wg, skb); + dev_kfree_skb(skb); + cond_resched(); + } +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + if (peer->sent_lastminute_handshake) + return; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT))) + send = true; + rcu_read_unlock_bh(); + + if (send) { + peer->sent_lastminute_handshake = true; + wg_packet_send_queued_handshake_initiation(peer, false); + } +} + +static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key) +{ + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct sk_buff *trailer; + unsigned int offset; + int num_frags; + + if (unlikely(!key)) + return false; + + if (unlikely(!READ_ONCE(key->is_valid) || + wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) || + key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { + WRITE_ONCE(key->is_valid, false); + return false; + } + + PACKET_CB(skb)->nonce = + le64_to_cpu(((struct message_data *)skb->data)->counter); + + /* We ensure that the network header is part of the packet before we + * call skb_cow_data, so that there's no chance that data is removed + * from the skb, so that later we can extract the original endpoint. + */ + offset = skb->data - skb_network_header(skb); + skb_push(skb, offset); + num_frags = skb_cow_data(skb, 0, &trailer); + offset += sizeof(struct message_data); + skb_pull(skb, offset); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0) + return false; + + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0, + PACKET_CB(skb)->nonce, + key->key)) + return false; + + /* Another ugly situation of pushing and pulling the header so as to + * keep endpoint information intact. + */ + skb_push(skb, offset); + if (pskb_trim(skb, skb->len - noise_encrypted_len(0))) + return false; + skb_pull(skb, offset); + + return true; +} + +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */ +static bool counter_validate(union noise_counter *counter, u64 their_counter) +{ + unsigned long index, index_current, top, i; + bool ret = false; + + spin_lock_bh(&counter->receive.lock); + + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || + their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < + counter->receive.counter)) + goto out; + + index = their_counter >> ilog2(BITS_PER_LONG); + + if (likely(their_counter > counter->receive.counter)) { + index_current = counter->receive.counter >> ilog2(BITS_PER_LONG); + top = min_t(unsigned long, index - index_current, + COUNTER_BITS_TOTAL / BITS_PER_LONG); + for (i = 1; i <= top; ++i) + counter->receive.backtrack[(i + index_current) & + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0; + counter->receive.counter = their_counter; + } + + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1; + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), + &counter->receive.backtrack[index]); + +out: + spin_unlock_bh(&counter->receive.lock); + return ret; +} + +#include "selftest/counter.c" + +static void wg_packet_consume_data_done(struct wg_peer *peer, + struct sk_buff *skb, + struct endpoint *endpoint) +{ + struct net_device *dev = peer->device->dev; + unsigned int len, len_before_trim; + struct wg_peer *routed_peer; + + wg_socket_set_peer_endpoint(peer, endpoint); + + if (unlikely(wg_noise_received_with_keypair(&peer->keypairs, + PACKET_CB(skb)->keypair))) { + wg_timers_handshake_complete(peer); + wg_packet_send_staged_packets(peer); + } + + keep_key_fresh(peer); + + wg_timers_any_authenticated_packet_received(peer); + wg_timers_any_authenticated_packet_traversal(peer); + + /* A packet with length 0 is a keepalive packet */ + if (unlikely(!skb->len)) { + update_rx_stats(peer, message_data_len(0)); + net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + goto packet_processed; + } + + wg_timers_data_received(peer); + + if (unlikely(skb_network_header(skb) < skb->head)) + goto dishonest_packet_size; + if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && + (ip_hdr(skb)->version == 4 || + (ip_hdr(skb)->version == 6 && + pskb_network_may_pull(skb, sizeof(struct ipv6hdr))))))) + goto dishonest_packet_type; + + skb->dev = dev; + /* We've already verified the Poly1305 auth tag, which means this packet + * was not modified in transit. We can therefore tell the networking + * stack that all checksums of every layer of encapsulation have already + * been checked "by the hardware" and therefore is unnecessary to check + * again in software. + */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = ~0; /* All levels */ + skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); + if (skb->protocol == htons(ETH_P_IP)) { + len = ntohs(ip_hdr(skb)->tot_len); + if (unlikely(len < sizeof(struct iphdr))) + goto dishonest_packet_size; + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP_ECN_set_ce(ip_hdr(skb)); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + len = ntohs(ipv6_hdr(skb)->payload_len) + + sizeof(struct ipv6hdr); + if (INET_ECN_is_ce(PACKET_CB(skb)->ds)) + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + } else { + goto dishonest_packet_type; + } + + if (unlikely(len > skb->len)) + goto dishonest_packet_size; + len_before_trim = skb->len; + if (unlikely(pskb_trim(skb, len))) + goto packet_processed; + + routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips, + skb); + wg_peer_put(routed_peer); /* We don't need the extra reference. */ + + if (unlikely(routed_peer != peer)) + goto dishonest_packet_peer; + + if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) { + ++dev->stats.rx_dropped; + net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, + &peer->endpoint.addr); + } else { + update_rx_stats(peer, message_data_len(len_before_trim)); + } + return; + +dishonest_packet_peer: + net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n", + dev->name, skb, peer->internal_id, + &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_type: + net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; +dishonest_packet_size: + net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n", + dev->name, peer->internal_id, &peer->endpoint.addr); + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + goto packet_processed; +packet_processed: + dev_kfree_skb(skb); +} + +int wg_packet_rx_poll(struct napi_struct *napi, int budget) +{ + struct wg_peer *peer = container_of(napi, struct wg_peer, napi); + struct crypt_queue *queue = &peer->rx_queue; + struct noise_keypair *keypair; + struct endpoint endpoint; + enum packet_state state; + struct sk_buff *skb; + int work_done = 0; + bool free; + + if (unlikely(budget <= 0)) + return 0; + + while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(skb); + keypair = PACKET_CB(skb)->keypair; + free = true; + + if (unlikely(state != PACKET_STATE_CRYPTED)) + goto next; + + if (unlikely(!counter_validate(&keypair->receiving.counter, + PACKET_CB(skb)->nonce))) { + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n", + peer->device->dev->name, + PACKET_CB(skb)->nonce, + keypair->receiving.counter.receive.counter); + goto next; + } + + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb))) + goto next; + + wg_reset_packet(skb); + wg_packet_consume_data_done(peer, skb, &endpoint); + free = false; + +next: + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + if (unlikely(free)) + dev_kfree_skb(skb); + + if (++work_done >= budget) + break; + } + + if (work_done < budget) + napi_complete_done(napi, work_done); + + return work_done; +} + +void wg_packet_decrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *skb; + + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = likely(decrypt_packet(skb, + &PACKET_CB(skb)->keypair->receiving)) ? + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; + wg_queue_enqueue_per_peer_napi(skb, state); + } +} + +static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) +{ + __le32 idx = ((struct message_data *)skb->data)->key_idx; + struct wg_peer *peer = NULL; + int ret; + + rcu_read_lock_bh(); + PACKET_CB(skb)->keypair = + (struct noise_keypair *)wg_index_hashtable_lookup( + wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx, + &peer); + if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair))) + goto err_keypair; + + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, + &peer->rx_queue, skb, + wg->packet_crypt_wq, + &wg->decrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); + if (likely(!ret || ret == -EPIPE)) { + rcu_read_unlock_bh(); + return; + } +err: + wg_noise_keypair_put(PACKET_CB(skb)->keypair, false); +err_keypair: + rcu_read_unlock_bh(); + wg_peer_put(peer); + dev_kfree_skb(skb); +} + +void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) +{ + if (unlikely(prepare_skb_header(skb, wg) < 0)) + goto err; + switch (SKB_TYPE_LE32(skb)) { + case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): + case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): + case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { + int cpu; + + if (skb_queue_len(&wg->incoming_handshakes) > + MAX_QUEUED_INCOMING_HANDSHAKES || + unlikely(!rng_is_initialized())) { + net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + skb_queue_tail(&wg->incoming_handshakes, skb); + /* Queues up a call to packet_process_queued_handshake_ + * packets(skb): + */ + cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + queue_work_on(cpu, wg->handshake_receive_wq, + &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + break; + } + case cpu_to_le32(MESSAGE_DATA): + PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb); + wg_packet_consume_data(wg, skb); + break; + default: + net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", + wg->dev->name, skb); + goto err; + } + return; + +err: + dev_kfree_skb(skb); +} diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c new file mode 100644 index 000000000000..846db14cb046 --- /dev/null +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * This contains some basic static unit tests for the allowedips data structure. + * It also has two additional modes that are disabled and meant to be used by + * folks directly playing with this file. If you define the macro + * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in + * memory, it will be printed out as KERN_DEBUG in a format that can be passed + * to graphviz (the dot command) to visualize it. If you define the macro + * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of + * randomized tests done against a trivial implementation, which may take + * upwards of a half-hour to complete. There's no set of users who should be + * enabling these, and the only developers that should go anywhere near these + * nobs are the ones who are reading this comment. + */ + +#ifdef DEBUG + +#include + +static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, + u8 cidr) +{ + swap_endian(dst, src, bits); + memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); + if (cidr) + dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); +} + +static __init void print_node(struct allowedips_node *node, u8 bits) +{ + char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; + char *fmt_declaration = KERN_DEBUG + "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; + char *style = "dotted"; + u8 ip1[16], ip2[16]; + u32 color = 0; + + if (bits == 32) { + fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; + } else if (bits == 128) { + fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; + fmt_declaration = KERN_DEBUG + "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; + } + if (node->peer) { + hsiphash_key_t key = { { 0 } }; + + memcpy(&key, &node->peer, sizeof(node->peer)); + color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | + hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | + hsiphash_1u32(0xabad1dea, &key) % 200; + style = "bold"; + } + swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); + printk(fmt_declaration, ip1, node->cidr, style, color); + if (node->bit[0]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[0])->bits, bits, + node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[0])->cidr); + print_node(rcu_dereference_raw(node->bit[0]), bits); + } + if (node->bit[1]) { + swap_endian_and_apply_cidr(ip2, + rcu_dereference_raw(node->bit[1])->bits, + bits, node->cidr); + printk(fmt_connection, ip1, node->cidr, ip2, + rcu_dereference_raw(node->bit[1])->cidr); + print_node(rcu_dereference_raw(node->bit[1]), bits); + } +} + +static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) +{ + printk(KERN_DEBUG "digraph trie {\n"); + print_node(rcu_dereference_raw(top), bits); + printk(KERN_DEBUG "}\n"); +} + +enum { + NUM_PEERS = 2000, + NUM_RAND_ROUTES = 400, + NUM_MUTATED_ROUTES = 100, + NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30 +}; + +struct horrible_allowedips { + struct hlist_head head; +}; + +struct horrible_allowedips_node { + struct hlist_node table; + union nf_inet_addr ip; + union nf_inet_addr mask; + u8 ip_version; + void *value; +}; + +static __init void horrible_allowedips_init(struct horrible_allowedips *table) +{ + INIT_HLIST_HEAD(&table->head); +} + +static __init void horrible_allowedips_free(struct horrible_allowedips *table) +{ + struct horrible_allowedips_node *node; + struct hlist_node *h; + + hlist_for_each_entry_safe(node, h, &table->head, table) { + hlist_del(&node->table); + kfree(node); + } +} + +static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr) +{ + union nf_inet_addr mask; + + memset(&mask, 0x00, 128 / 8); + memset(&mask, 0xff, cidr / 8); + if (cidr % 32) + mask.all[cidr / 32] = (__force u32)htonl( + (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); + return mask; +} + +static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet) +{ + return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) + + hweight32(subnet.all[2]) + hweight32(subnet.all[3]); +} + +static __init inline void +horrible_mask_self(struct horrible_allowedips_node *node) +{ + if (node->ip_version == 4) { + node->ip.ip &= node->mask.ip; + } else if (node->ip_version == 6) { + node->ip.ip6[0] &= node->mask.ip6[0]; + node->ip.ip6[1] &= node->mask.ip6[1]; + node->ip.ip6[2] &= node->mask.ip6[2]; + node->ip.ip6[3] &= node->mask.ip6[3]; + } +} + +static __init inline bool +horrible_match_v4(const struct horrible_allowedips_node *node, + struct in_addr *ip) +{ + return (ip->s_addr & node->mask.ip) == node->ip.ip; +} + +static __init inline bool +horrible_match_v6(const struct horrible_allowedips_node *node, + struct in6_addr *ip) +{ + return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == + node->ip.ip6[0] && + (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == + node->ip.ip6[1] && + (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == + node->ip.ip6[2] && + (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; +} + +static __init void +horrible_insert_ordered(struct horrible_allowedips *table, + struct horrible_allowedips_node *node) +{ + struct horrible_allowedips_node *other = NULL, *where = NULL; + u8 my_cidr = horrible_mask_to_cidr(node->mask); + + hlist_for_each_entry(other, &table->head, table) { + if (!memcmp(&other->mask, &node->mask, + sizeof(union nf_inet_addr)) && + !memcmp(&other->ip, &node->ip, + sizeof(union nf_inet_addr)) && + other->ip_version == node->ip_version) { + other->value = node->value; + kfree(node); + return; + } + where = other; + if (horrible_mask_to_cidr(other->mask) <= my_cidr) + break; + } + if (!other && !where) + hlist_add_head(&node->table, &table->head); + else if (!other) + hlist_add_behind(&node->table, &where->table); + else + hlist_add_before(&node->table, &where->table); +} + +static __init int +horrible_allowedips_insert_v4(struct horrible_allowedips *table, + struct in_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 4; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init int +horrible_allowedips_insert_v6(struct horrible_allowedips *table, + struct in6_addr *ip, u8 cidr, void *value) +{ + struct horrible_allowedips_node *node = kzalloc(sizeof(*node), + GFP_KERNEL); + + if (unlikely(!node)) + return -ENOMEM; + node->ip.in6 = *ip; + node->mask = horrible_cidr_to_mask(cidr); + node->ip_version = 6; + node->value = value; + horrible_mask_self(node); + horrible_insert_ordered(table, node); + return 0; +} + +static __init void * +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, + struct in_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 4) + continue; + if (horrible_match_v4(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init void * +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, + struct in6_addr *ip) +{ + struct horrible_allowedips_node *node; + void *ret = NULL; + + hlist_for_each_entry(node, &table->head, table) { + if (node->ip_version != 6) + continue; + if (horrible_match_v6(node, ip)) { + ret = node->value; + break; + } + } + return ret; +} + +static __init bool randomized_test(void) +{ + unsigned int i, j, k, mutate_amount, cidr; + u8 ip[16], mutate_mask[16], mutated[16]; + struct wg_peer **peers, *peer; + struct horrible_allowedips h; + DEFINE_MUTEX(mutex); + struct allowedips t; + bool ret = false; + + mutex_init(&mutex); + + wg_allowedips_init(&t); + horrible_allowedips_init(&h); + + peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL); + if (unlikely(!peers)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + for (i = 0; i < NUM_PEERS; ++i) { + peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL); + if (unlikely(!peers[i])) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free; + } + kref_init(&peers[i]->refcount); + } + + mutex_lock(&mutex); + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 4); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 4); + prandom_bytes(mutate_mask, 4); + mutate_amount = prandom_u32_max(32); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(32) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v4(&t, + (struct in_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v4(&h, + (struct in_addr *)mutated, cidr, peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + for (i = 0; i < NUM_RAND_ROUTES; ++i) { + prandom_bytes(ip, 16); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr, + peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip, + cidr, peer) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + for (j = 0; j < NUM_MUTATED_ROUTES; ++j) { + memcpy(mutated, ip, 16); + prandom_bytes(mutate_mask, 16); + mutate_amount = prandom_u32_max(128); + for (k = 0; k < mutate_amount / 8; ++k) + mutate_mask[k] = 0xff; + mutate_mask[k] = 0xff + << ((8 - (mutate_amount % 8)) % 8); + for (; k < 4; ++k) + mutate_mask[k] = 0; + for (k = 0; k < 4; ++k) + mutated[k] = (mutated[k] & mutate_mask[k]) | + (~mutate_mask[k] & + prandom_u32_max(256)); + cidr = prandom_u32_max(128) + 1; + peer = peers[prandom_u32_max(NUM_PEERS)]; + if (wg_allowedips_insert_v6(&t, + (struct in6_addr *)mutated, + cidr, peer, &mutex) < 0) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + if (horrible_allowedips_insert_v6( + &h, (struct in6_addr *)mutated, cidr, + peer)) { + pr_err("allowedips random self-test malloc: FAIL\n"); + goto free_locked; + } + } + } + + mutex_unlock(&mutex); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 4); + if (lookup(t.root4, 32, ip) != + horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + + for (i = 0; i < NUM_QUERIES; ++i) { + prandom_bytes(ip, 16); + if (lookup(t.root6, 128, ip) != + horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { + pr_err("allowedips random self-test: FAIL\n"); + goto free; + } + } + ret = true; + +free: + mutex_lock(&mutex); +free_locked: + wg_allowedips_free(&t, &mutex); + mutex_unlock(&mutex); + horrible_allowedips_free(&h); + if (peers) { + for (i = 0; i < NUM_PEERS; ++i) + kfree(peers[i]); + } + kfree(peers); + return ret; +} + +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d) +{ + static struct in_addr ip; + u8 *split = (u8 *)&ip; + + split[0] = a; + split[1] = b; + split[2] = c; + split[3] = d; + return &ip; +} + +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d) +{ + static struct in6_addr ip; + __be32 *split = (__be32 *)&ip; + + split[0] = cpu_to_be32(a); + split[1] = cpu_to_be32(b); + split[2] = cpu_to_be32(c); + split[3] = cpu_to_be32(d); + return &ip; +} + +static __init struct wg_peer *init_peer(void) +{ + struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL); + + if (!peer) + return NULL; + kref_init(&peer->refcount); + INIT_LIST_HEAD(&peer->allowedips_list); + return peer; +} + +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \ + wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \ + cidr, mem, &mutex) + +#define maybe_fail() do { \ + ++i; \ + if (!_s) { \ + pr_info("allowedips self-test %zu: FAIL\n", i); \ + success = false; \ + } \ + } while (0) + +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) == (mem); \ + maybe_fail(); \ + } while (0) + +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \ + ip##version(ipa, ipb, ipc, ipd)) != (mem); \ + maybe_fail(); \ + } while (0) + +#define test_boolean(cond) do { \ + bool _s = (cond); \ + maybe_fail(); \ + } while (0) + +bool __init wg_allowedips_selftest(void) +{ + bool found_a = false, found_b = false, found_c = false, found_d = false, + found_e = false, found_other = false; + struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(), + *d = init_peer(), *e = init_peer(), *f = init_peer(), + *g = init_peer(), *h = init_peer(); + struct allowedips_node *iter_node; + bool success = false; + struct allowedips t; + DEFINE_MUTEX(mutex); + struct in6_addr ip; + size_t i = 0, count = 0; + __be64 part; + + mutex_init(&mutex); + mutex_lock(&mutex); + wg_allowedips_init(&t); + + if (!a || !b || !c || !d || !e || !f || !g || !h) { + pr_err("allowedips self-test malloc: FAIL\n"); + goto free; + } + + insert(4, a, 192, 168, 4, 0, 24); + insert(4, b, 192, 168, 4, 4, 32); + insert(4, c, 192, 168, 0, 0, 16); + insert(4, d, 192, 95, 5, 64, 27); + /* replaces previous entry, and maskself is required */ + insert(4, c, 192, 95, 5, 65, 27); + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); + insert(4, e, 0, 0, 0, 0, 0); + insert(6, e, 0, 0, 0, 0, 0); + /* replaces previous entry */ + insert(6, f, 0, 0, 0, 0, 0); + insert(6, g, 0x24046800, 0, 0, 0, 32); + /* maskself is required */ + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); + insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128); + insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98); + insert(4, g, 64, 15, 112, 0, 20); + /* maskself is required */ + insert(4, h, 64, 15, 123, 211, 25); + insert(4, a, 10, 0, 0, 0, 25); + insert(4, b, 10, 0, 0, 128, 25); + insert(4, a, 10, 1, 0, 0, 30); + insert(4, b, 10, 1, 0, 4, 30); + insert(4, c, 10, 1, 0, 8, 29); + insert(4, d, 10, 1, 0, 16, 29); + + if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) { + print_tree(t.root4, 32); + print_tree(t.root6, 128); + } + + success = true; + + test(4, a, 192, 168, 4, 20); + test(4, a, 192, 168, 4, 0); + test(4, b, 192, 168, 4, 4); + test(4, c, 192, 168, 200, 182); + test(4, c, 192, 95, 5, 68); + test(4, e, 192, 95, 5, 96); + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); + test(6, f, 0x26075300, 0x60006b01, 0, 0); + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0, 0); + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); + test(4, g, 64, 15, 116, 26); + test(4, g, 64, 15, 127, 3); + test(4, g, 64, 15, 123, 1); + test(4, h, 64, 15, 123, 128); + test(4, h, 64, 15, 123, 129); + test(4, a, 10, 0, 0, 52); + test(4, b, 10, 0, 0, 220); + test(4, a, 10, 1, 0, 2); + test(4, b, 10, 1, 0, 6); + test(4, c, 10, 1, 0, 10); + test(4, d, 10, 1, 0, 20); + + insert(4, a, 1, 0, 0, 0, 32); + insert(4, a, 64, 0, 0, 0, 32); + insert(4, a, 128, 0, 0, 0, 32); + insert(4, a, 192, 0, 0, 0, 32); + insert(4, a, 255, 0, 0, 0, 32); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 1, 0, 0, 0); + test_negative(4, a, 64, 0, 0, 0); + test_negative(4, a, 128, 0, 0, 0); + test_negative(4, a, 192, 0, 0, 0); + test_negative(4, a, 255, 0, 0, 0); + + wg_allowedips_free(&t, &mutex); + wg_allowedips_init(&t); + insert(4, a, 192, 168, 0, 0, 16); + insert(4, a, 192, 168, 0, 0, 24); + wg_allowedips_remove_by_peer(&t, a, &mutex); + test_negative(4, a, 192, 168, 0, 1); + + /* These will hit the WARN_ON(len >= 128) in free_node if something + * goes wrong. + */ + for (i = 0; i < 128; ++i) { + part = cpu_to_be64(~(1LLU << (i % 64))); + memset(&ip, 0xff, 16); + memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); + wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex); + } + + wg_allowedips_free(&t, &mutex); + + wg_allowedips_init(&t); + insert(4, a, 192, 95, 5, 93, 27); + insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(4, a, 10, 1, 0, 20, 29); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83); + insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21); + list_for_each_entry(iter_node, &a->allowedips_list, peer_list) { + u8 cidr, ip[16] __aligned(__alignof(u64)); + int family = wg_allowedips_read_node(iter_node, ip, &cidr); + + count++; + + if (cidr == 27 && family == AF_INET && + !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr))) + found_a = true; + else if (cidr == 128 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543), + sizeof(struct in6_addr))) + found_b = true; + else if (cidr == 29 && family == AF_INET && + !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr))) + found_c = true; + else if (cidr == 83 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0), + sizeof(struct in6_addr))) + found_d = true; + else if (cidr == 21 && family == AF_INET6 && + !memcmp(ip, ip6(0x26075000, 0, 0, 0), + sizeof(struct in6_addr))) + found_e = true; + else + found_other = true; + } + test_boolean(count == 5); + test_boolean(found_a); + test_boolean(found_b); + test_boolean(found_c); + test_boolean(found_d); + test_boolean(found_e); + test_boolean(!found_other); + + if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success) + success = randomized_test(); + + if (success) + pr_info("allowedips self-tests: pass\n"); + +free: + wg_allowedips_free(&t, &mutex); + kfree(a); + kfree(b); + kfree(c); + kfree(d); + kfree(e); + kfree(f); + kfree(g); + kfree(h); + mutex_unlock(&mutex); + + return success; +} + +#undef test_negative +#undef test +#undef remove +#undef insert +#undef init_peer + +#endif diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c new file mode 100644 index 000000000000..f4fbb9072ed7 --- /dev/null +++ b/drivers/net/wireguard/selftest/counter.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifdef DEBUG +bool __init wg_packet_counter_selftest(void) +{ + unsigned int test_num = 0, i; + union noise_counter counter; + bool success = true; + +#define T_INIT do { \ + memset(&counter, 0, sizeof(union noise_counter)); \ + spin_lock_init(&counter.receive.lock); \ + } while (0) +#define T_LIM (COUNTER_WINDOW_SIZE + 1) +#define T(n, v) do { \ + ++test_num; \ + if (counter_validate(&counter, n) != (v)) { \ + pr_err("nonce counter self-test %u: FAIL\n", \ + test_num); \ + success = false; \ + } \ + } while (0) + + T_INIT; + /* 1 */ T(0, true); + /* 2 */ T(1, true); + /* 3 */ T(1, false); + /* 4 */ T(9, true); + /* 5 */ T(8, true); + /* 6 */ T(7, true); + /* 7 */ T(7, false); + /* 8 */ T(T_LIM, true); + /* 9 */ T(T_LIM - 1, true); + /* 10 */ T(T_LIM - 1, false); + /* 11 */ T(T_LIM - 2, true); + /* 12 */ T(2, true); + /* 13 */ T(2, false); + /* 14 */ T(T_LIM + 16, true); + /* 15 */ T(3, false); + /* 16 */ T(T_LIM + 16, false); + /* 17 */ T(T_LIM * 4, true); + /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true); + /* 19 */ T(10, false); + /* 20 */ T(T_LIM * 4 - T_LIM, false); + /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false); + /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true); + /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false); + /* 24 */ T(0, false); + /* 25 */ T(REJECT_AFTER_MESSAGES, false); + /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true); + /* 27 */ T(REJECT_AFTER_MESSAGES, false); + /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false); + /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true); + /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false); + /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false); + /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false); + /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true); + /* 34 */ T(0, false); + + T_INIT; + for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i) + T(i, true); + T(0, true); + T(0, false); + + T_INIT; + for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i) + T(i, true); + T(1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;) + T(i, true); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;) + T(i, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(COUNTER_WINDOW_SIZE + 1, true); + T(0, false); + + T_INIT; + for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;) + T(i, true); + T(0, true); + T(COUNTER_WINDOW_SIZE + 1, true); + +#undef T +#undef T_LIM +#undef T_INIT + + if (success) + pr_info("nonce counter self-tests: pass\n"); + return success; +} +#endif diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c new file mode 100644 index 000000000000..bcd6462e4540 --- /dev/null +++ b/drivers/net/wireguard/selftest/ratelimiter.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifdef DEBUG + +#include + +static const struct { + bool result; + unsigned int msec_to_sleep_before; +} expected_results[] __initconst = { + [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, + [PACKETS_BURSTABLE] = { false, 0 }, + [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, + [PACKETS_BURSTABLE + 2] = { false, 0 }, + [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, + [PACKETS_BURSTABLE + 4] = { true, 0 }, + [PACKETS_BURSTABLE + 5] = { false, 0 } +}; + +static __init unsigned int maximum_jiffies_at_index(int index) +{ + unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; + int i; + + for (i = 0; i <= index; ++i) + total_msecs += expected_results[i].msec_to_sleep_before; + return msecs_to_jiffies(total_msecs); +} + +static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, + struct sk_buff *skb6, struct ipv6hdr *hdr6, + int *test) +{ + unsigned long loop_start_time; + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + loop_start_time = jiffies; + + for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { + if (expected_results[i].msec_to_sleep_before) + msleep(expected_results[i].msec_to_sleep_before); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb4, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb4, &init_net)) + return -EXFULL; + ++(*test); + + hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1); + +#if IS_ENABLED(CONFIG_IPV6) + hdr6->saddr.in6_u.u6_addr32[2] = htonl(i); + hdr6->saddr.in6_u.u6_addr32[3] = htonl(i); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (wg_ratelimiter_allow(skb6, &init_net) != + expected_results[i].result) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1); + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; + if (!wg_ratelimiter_allow(skb6, &init_net)) + return -EXFULL; + ++(*test); + + hdr6->saddr.in6_u.u6_addr32[0] = + htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1); + + if (time_is_before_jiffies(loop_start_time + + maximum_jiffies_at_index(i))) + return -ETIMEDOUT; +#endif + } + return 0; +} + +static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4, + int *test) +{ + int i; + + wg_ratelimiter_gc_entries(NULL); + rcu_barrier(); + + if (atomic_read(&total_entries)) + return -EXFULL; + ++(*test); + + for (i = 0; i <= max_entries; ++i) { + hdr4->saddr = htonl(i); + if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries)) + return -EXFULL; + ++(*test); + } + return 0; +} + +bool __init wg_ratelimiter_selftest(void) +{ + enum { TRIALS_BEFORE_GIVING_UP = 5000 }; + bool success = false; + int test = 0, trials; + struct sk_buff *skb4, *skb6; + struct iphdr *hdr4; + struct ipv6hdr *hdr6; + + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) + return true; + + BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); + + if (wg_ratelimiter_init()) + goto out; + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + goto out; + } + ++test; + if (wg_ratelimiter_init()) { + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + goto out; + } + ++test; + + skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL); + if (unlikely(!skb4)) + goto err_nofree; + skb4->protocol = htons(ETH_P_IP); + hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4)); + hdr4->saddr = htonl(8182); + skb_reset_network_header(skb4); + ++test; + +#if IS_ENABLED(CONFIG_IPV6) + skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL); + if (unlikely(!skb6)) { + kfree_skb(skb4); + goto err_nofree; + } + skb6->protocol = htons(ETH_P_IPV6); + hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6)); + hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212); + hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188); + skb_reset_network_header(skb6); + ++test; +#endif + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0, ret; + + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count); + if (ret == -ETIMEDOUT) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(500); + continue; + } else if (ret < 0) { + test += test_count; + goto err; + } else { + test += test_count; + break; + } + } + + for (trials = TRIALS_BEFORE_GIVING_UP;;) { + int test_count = 0; + + if (capacity_test(skb4, hdr4, &test_count) < 0) { + if (!trials--) { + test += test_count; + goto err; + } + msleep(50); + continue; + } + test += test_count; + break; + } + + success = true; + +err: + kfree_skb(skb4); +#if IS_ENABLED(CONFIG_IPV6) + kfree_skb(skb6); +#endif +err_nofree: + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + wg_ratelimiter_uninit(); + /* Uninit one extra time to check underflow detection. */ + wg_ratelimiter_uninit(); +out: + if (success) + pr_info("ratelimiter self-tests: pass\n"); + else + pr_err("ratelimiter self-test %d: FAIL\n", test); + + return success; +} +#endif diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c new file mode 100644 index 000000000000..7348c10cbae3 --- /dev/null +++ b/drivers/net/wireguard/send.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "queueing.h" +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "messages.h" +#include "cookie.h" + +#include +#include +#include +#include +#include +#include + +static void wg_packet_send_handshake_initiation(struct wg_peer *peer) +{ + struct message_handshake_initiation packet; + + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT)) + return; /* This function is rate limited. */ + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), + HANDSHAKE_DSCP); + wg_timers_handshake_initiated(peer); + } +} + +void wg_packet_handshake_send_worker(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + transmit_handshake_work); + + wg_packet_send_handshake_initiation(peer); + wg_peer_put(peer); +} + +void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, + bool is_retry) +{ + if (!is_retry) + peer->timer_handshake_attempts = 0; + + rcu_read_lock_bh(); + /* We check last_sent_handshake here in addition to the actual function + * we're queueing up, so that we don't queue things if not strictly + * necessary: + */ + if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), + REKEY_TIMEOUT) || + unlikely(READ_ONCE(peer->is_dead))) + goto out; + + wg_peer_get(peer); + /* Queues up calling packet_send_queued_handshakes(peer), where we do a + * peer_put(peer) after: + */ + if (!queue_work(peer->device->handshake_send_wq, + &peer->transmit_handshake_work)) + /* If the work was already queued, we want to drop the + * extra reference: + */ + wg_peer_put(peer); +out: + rcu_read_unlock_bh(); +} + +void wg_packet_send_handshake_response(struct wg_peer *peer) +{ + struct message_handshake_response packet; + + atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); + net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + + if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { + wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + if (wg_noise_handshake_begin_session(&peer->handshake, + &peer->keypairs)) { + wg_timers_session_derived(peer); + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + atomic64_set(&peer->last_sent_handshake, + ktime_get_coarse_boottime_ns()); + wg_socket_send_buffer_to_peer(peer, &packet, + sizeof(packet), + HANDSHAKE_DSCP); + } + } +} + +void wg_packet_send_handshake_cookie(struct wg_device *wg, + struct sk_buff *initiating_skb, + __le32 sender_index) +{ + struct message_handshake_cookie packet; + + net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", + wg->dev->name, initiating_skb); + wg_cookie_message_create(&packet, initiating_skb, sender_index, + &wg->cookie_checker); + wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, + sizeof(packet)); +} + +static void keep_key_fresh(struct wg_peer *peer) +{ + struct noise_keypair *keypair; + bool send = false; + + rcu_read_lock_bh(); + keypair = rcu_dereference_bh(peer->keypairs.current_keypair); + if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) && + (unlikely(atomic64_read(&keypair->sending.counter.counter) > + REKEY_AFTER_MESSAGES) || + (keypair->i_am_the_initiator && + unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, + REKEY_AFTER_TIME))))) + send = true; + rcu_read_unlock_bh(); + + if (send) + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static unsigned int calculate_skb_padding(struct sk_buff *skb) +{ + unsigned int padded_size, last_unit = skb->len; + + if (unlikely(!PACKET_CB(skb)->mtu)) + return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; + + /* We do this modulo business with the MTU, just in case the networking + * layer gives us a packet that's bigger than the MTU. In that case, we + * wouldn't want the final subtraction to overflow in the case of the + * padded_size being clamped. Fortunately, that's very rarely the case, + * so we optimize for that not happening. + */ + if (unlikely(last_unit > PACKET_CB(skb)->mtu)) + last_unit %= PACKET_CB(skb)->mtu; + + padded_size = min(PACKET_CB(skb)->mtu, + ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); + return padded_size - last_unit; +} + +static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair) +{ + unsigned int padding_len, plaintext_len, trailer_len; + struct scatterlist sg[MAX_SKB_FRAGS + 8]; + struct message_data *header; + struct sk_buff *trailer; + int num_frags; + + /* Calculate lengths. */ + padding_len = calculate_skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag. */ + num_frags = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) + return false; + + /* Set the padding to zeros, and make sure it and the auth tag are part + * of the skb. + */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network + * stack's headers. + */ + if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) + return false; + + /* Finalize checksum calculation for the inner packet, if required. */ + if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_help(skb))) + return false; + + /* Only after checksumming can we safely add on the padding at the end + * and the header. + */ + skb_set_inner_network_header(skb, 0); + header = (struct message_data *)skb_push(skb, sizeof(*header)); + header->header.type = cpu_to_le32(MESSAGE_DATA); + header->key_idx = keypair->remote_index; + header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); + pskb_put(skb, trailer, trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, num_frags); + if (skb_to_sgvec(skb, sg, sizeof(struct message_data), + noise_encrypted_len(plaintext_len)) <= 0) + return false; + return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, + PACKET_CB(skb)->nonce, + keypair->sending.key); +} + +void wg_packet_send_keepalive(struct wg_peer *peer) +{ + struct sk_buff *skb; + + if (skb_queue_empty(&peer->staged_packet_queue)) { + skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, + GFP_ATOMIC); + if (unlikely(!skb)) + return; + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); + skb->dev = peer->device->dev; + PACKET_CB(skb)->mtu = skb->dev->mtu; + skb_queue_tail(&peer->staged_packet_queue, skb); + net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr); + } + + wg_packet_send_staged_packets(peer); +} + +static void wg_packet_create_data_done(struct sk_buff *first, + struct wg_peer *peer) +{ + struct sk_buff *skb, *next; + bool is_keepalive, data_sent = false; + + wg_timers_any_authenticated_packet_traversal(peer); + wg_timers_any_authenticated_packet_sent(peer); + skb_list_walk_safe(first, skb, next) { + is_keepalive = skb->len == message_data_len(0); + if (likely(!wg_socket_send_skb_to_peer(peer, skb, + PACKET_CB(skb)->ds) && !is_keepalive)) + data_sent = true; + } + + if (likely(data_sent)) + wg_timers_data_sent(peer); + + keep_key_fresh(peer); +} + +void wg_packet_tx_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct crypt_queue, + work); + struct noise_keypair *keypair; + enum packet_state state; + struct sk_buff *first; + struct wg_peer *peer; + + while ((first = __ptr_ring_peek(&queue->ring)) != NULL && + (state = atomic_read_acquire(&PACKET_CB(first)->state)) != + PACKET_STATE_UNCRYPTED) { + __ptr_ring_discard_one(&queue->ring); + peer = PACKET_PEER(first); + keypair = PACKET_CB(first)->keypair; + + if (likely(state == PACKET_STATE_CRYPTED)) + wg_packet_create_data_done(first, peer); + else + kfree_skb_list(first); + + wg_noise_keypair_put(keypair, false); + wg_peer_put(peer); + } +} + +void wg_packet_encrypt_worker(struct work_struct *work) +{ + struct crypt_queue *queue = container_of(work, struct multicore_worker, + work)->ptr; + struct sk_buff *first, *skb, *next; + + while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { + enum packet_state state = PACKET_STATE_CRYPTED; + + skb_list_walk_safe(first, skb, next) { + if (likely(encrypt_packet(skb, + PACKET_CB(first)->keypair))) { + wg_reset_packet(skb); + } else { + state = PACKET_STATE_DEAD; + break; + } + } + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, + state); + + } +} + +static void wg_packet_create_data(struct sk_buff *first) +{ + struct wg_peer *peer = PACKET_PEER(first); + struct wg_device *wg = peer->device; + int ret = -EINVAL; + + rcu_read_lock_bh(); + if (unlikely(READ_ONCE(peer->is_dead))) + goto err; + + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, + &peer->tx_queue, first, + wg->packet_crypt_wq, + &wg->encrypt_queue.last_cpu); + if (unlikely(ret == -EPIPE)) + wg_queue_enqueue_per_peer(&peer->tx_queue, first, + PACKET_STATE_DEAD); +err: + rcu_read_unlock_bh(); + if (likely(!ret || ret == -EPIPE)) + return; + wg_noise_keypair_put(PACKET_CB(first)->keypair, false); + wg_peer_put(peer); + kfree_skb_list(first); +} + +void wg_packet_purge_staged_packets(struct wg_peer *peer) +{ + spin_lock_bh(&peer->staged_packet_queue.lock); + peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; + __skb_queue_purge(&peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); +} + +void wg_packet_send_staged_packets(struct wg_peer *peer) +{ + struct noise_symmetric_key *key; + struct noise_keypair *keypair; + struct sk_buff_head packets; + struct sk_buff *skb; + + /* Steal the current queue into our local one. */ + __skb_queue_head_init(&packets); + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice_init(&peer->staged_packet_queue, &packets); + spin_unlock_bh(&peer->staged_packet_queue.lock); + if (unlikely(skb_queue_empty(&packets))) + return; + + /* First we make sure we have a valid reference to a valid key. */ + rcu_read_lock_bh(); + keypair = wg_noise_keypair_get( + rcu_dereference_bh(peer->keypairs.current_keypair)); + rcu_read_unlock_bh(); + if (unlikely(!keypair)) + goto out_nokey; + key = &keypair->sending; + if (unlikely(!READ_ONCE(key->is_valid))) + goto out_nokey; + if (unlikely(wg_birthdate_has_expired(key->birthdate, + REJECT_AFTER_TIME))) + goto out_invalid; + + /* After we know we have a somewhat valid key, we now try to assign + * nonces to all of the packets in the queue. If we can't assign nonces + * for all of them, we just consider it a failure and wait for the next + * handshake. + */ + skb_queue_walk(&packets, skb) { + /* 0 for no outer TOS: no leak. TODO: at some later point, we + * might consider using flowi->tos as outer instead. + */ + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); + PACKET_CB(skb)->nonce = + atomic64_inc_return(&key->counter.counter) - 1; + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) + goto out_invalid; + } + + packets.prev->next = NULL; + wg_peer_get(keypair->entry.peer); + PACKET_CB(packets.next)->keypair = keypair; + wg_packet_create_data(packets.next); + return; + +out_invalid: + WRITE_ONCE(key->is_valid, false); +out_nokey: + wg_noise_keypair_put(keypair, false); + + /* We orphan the packets if we're waiting on a handshake, so that they + * don't block a socket's pool. + */ + skb_queue_walk(&packets, skb) + skb_orphan(skb); + /* Then we put them back on the top of the queue. We're not too + * concerned about accidentally getting things a little out of order if + * packets are being added really fast, because this queue is for before + * packets can even be sent and it's small anyway. + */ + spin_lock_bh(&peer->staged_packet_queue.lock); + skb_queue_splice(&packets, &peer->staged_packet_queue); + spin_unlock_bh(&peer->staged_packet_queue.lock); + + /* If we're exiting because there's something wrong with the key, it + * means we should initiate a new handshake. + */ + wg_packet_send_queued_handshake_initiation(peer, false); +} diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c new file mode 100644 index 000000000000..b0d6541582d3 --- /dev/null +++ b/drivers/net/wireguard/socket.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "device.h" +#include "peer.h" +#include "socket.h" +#include "queueing.h" +#include "messages.h" + +#include +#include +#include +#include +#include +#include +#include + +static int send4(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ + struct flowi4 fl = { + .saddr = endpoint->src4.s_addr, + .daddr = endpoint->addr4.sin_addr.s_addr, + .fl4_dport = endpoint->addr4.sin_port, + .flowi4_mark = wg->fwmark, + .flowi4_proto = IPPROTO_UDP + }; + struct rtable *rt = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock4); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl4_sport = inet_sk(sock)->inet_sport; + + if (cache) + rt = dst_cache_get_ip4(cache, &fl.saddr); + + if (!rt) { + security_sk_classify_flow(sock, flowi4_to_flowi(&fl)); + if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0, + fl.saddr, RT_SCOPE_HOST))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + } + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) && + PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && + rt->dst.dev->ifindex != endpoint->src_if4)))) { + endpoint->src4.s_addr = 0; + *(__force __be32 *)&endpoint->src_if4 = 0; + fl.saddr = 0; + if (cache) + dst_cache_reset(cache); + if (!IS_ERR(rt)) + ip_rt_put(rt); + rt = ip_route_output_flow(sock_net(sock), &fl, sock); + } + if (unlikely(IS_ERR(rt))) { + ret = PTR_ERR(rt); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(rt->dst.dev == skb->dev)) { + ip_rt_put(rt); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip4(cache, &rt->dst, fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds, + ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport, + fl.fl4_dport, false, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +} + +static int send6(struct wg_device *wg, struct sk_buff *skb, + struct endpoint *endpoint, u8 ds, struct dst_cache *cache) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl = { + .saddr = endpoint->src6, + .daddr = endpoint->addr6.sin6_addr, + .fl6_dport = endpoint->addr6.sin6_port, + .flowi6_mark = wg->fwmark, + .flowi6_oif = endpoint->addr6.sin6_scope_id, + .flowi6_proto = IPPROTO_UDP + /* TODO: addr->sin6_flowinfo */ + }; + struct dst_entry *dst = NULL; + struct sock *sock; + int ret = 0; + + skb_mark_not_on_list(skb); + skb->dev = wg->dev; + skb->mark = wg->fwmark; + + rcu_read_lock_bh(); + sock = rcu_dereference_bh(wg->sock6); + + if (unlikely(!sock)) { + ret = -ENONET; + goto err; + } + + fl.fl6_sport = inet_sk(sock)->inet_sport; + + if (cache) + dst = dst_cache_get_ip6(cache, &fl.saddr); + + if (!dst) { + security_sk_classify_flow(sock, flowi6_to_flowi(&fl)); + if (unlikely(!ipv6_addr_any(&fl.saddr) && + !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) { + endpoint->src6 = fl.saddr = in6addr_any; + if (cache) + dst_cache_reset(cache); + } + dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl, + NULL); + if (unlikely(IS_ERR(dst))) { + ret = PTR_ERR(dst); + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", + wg->dev->name, &endpoint->addr, ret); + goto err; + } else if (unlikely(dst->dev == skb->dev)) { + dst_release(dst); + ret = -ELOOP; + net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", + wg->dev->name, &endpoint->addr); + goto err; + } + if (cache) + dst_cache_set_ip6(cache, dst, &fl.saddr); + } + + skb->ignore_df = 1; + udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds, + ip6_dst_hoplimit(dst), 0, fl.fl6_sport, + fl.fl6_dport, false); + goto out; + +err: + kfree_skb(skb); +out: + rcu_read_unlock_bh(); + return ret; +#else + return -EAFNOSUPPORT; +#endif +} + +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds) +{ + size_t skb_len = skb->len; + int ret = -EAFNOSUPPORT; + + read_lock_bh(&peer->endpoint_lock); + if (peer->endpoint.addr.sa_family == AF_INET) + ret = send4(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else if (peer->endpoint.addr.sa_family == AF_INET6) + ret = send6(peer->device, skb, &peer->endpoint, ds, + &peer->endpoint_cache); + else + dev_kfree_skb(skb); + if (likely(!ret)) + peer->tx_bytes += skb_len; + read_unlock_bh(&peer->endpoint_lock); + + return ret; +} + +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer, + size_t len, u8 ds) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + + if (unlikely(!skb)) + return -ENOMEM; + + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + return wg_socket_send_skb_to_peer(peer, skb, ds); +} + +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, void *buffer, + size_t len) +{ + int ret = 0; + struct sk_buff *skb; + struct endpoint endpoint; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = wg_socket_endpoint_from_skb(&endpoint, in_skb); + if (unlikely(ret < 0)) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + skb_set_inner_network_header(skb, 0); + skb_put_data(skb, buffer, len); + + if (endpoint.addr.sa_family == AF_INET) + ret = send4(wg, skb, &endpoint, 0, NULL); + else if (endpoint.addr.sa_family == AF_INET6) + ret = send6(wg, skb, &endpoint, 0, NULL); + /* No other possibilities if the endpoint is valid, which it is, + * as we checked above. + */ + + return ret; +} + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb) +{ + memset(endpoint, 0, sizeof(*endpoint)); + if (skb->protocol == htons(ETH_P_IP)) { + endpoint->addr4.sin_family = AF_INET; + endpoint->addr4.sin_port = udp_hdr(skb)->source; + endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; + endpoint->src4.s_addr = ip_hdr(skb)->daddr; + endpoint->src_if4 = skb->skb_iif; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + endpoint->addr6.sin6_family = AF_INET6; + endpoint->addr6.sin6_port = udp_hdr(skb)->source; + endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; + endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id( + &ipv6_hdr(skb)->saddr, skb->skb_iif); + endpoint->src6 = ipv6_hdr(skb)->daddr; + } else { + return -EINVAL; + } + return 0; +} + +static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b) +{ + return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET && + a->addr4.sin_port == b->addr4.sin_port && + a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr && + a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) || + (a->addr.sa_family == AF_INET6 && + b->addr.sa_family == AF_INET6 && + a->addr6.sin6_port == b->addr6.sin6_port && + ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) && + a->addr6.sin6_scope_id == b->addr6.sin6_scope_id && + ipv6_addr_equal(&a->src6, &b->src6)) || + unlikely(!a->addr.sa_family && !b->addr.sa_family); +} + +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint) +{ + /* First we check unlocked, in order to optimize, since it's pretty rare + * that an endpoint will change. If we happen to be mid-write, and two + * CPUs wind up writing the same thing or something slightly different, + * it doesn't really matter much either. + */ + if (endpoint_eq(endpoint, &peer->endpoint)) + return; + write_lock_bh(&peer->endpoint_lock); + if (endpoint->addr.sa_family == AF_INET) { + peer->endpoint.addr4 = endpoint->addr4; + peer->endpoint.src4 = endpoint->src4; + peer->endpoint.src_if4 = endpoint->src_if4; + } else if (endpoint->addr.sa_family == AF_INET6) { + peer->endpoint.addr6 = endpoint->addr6; + peer->endpoint.src6 = endpoint->src6; + } else { + goto out; + } + dst_cache_reset(&peer->endpoint_cache); +out: + write_unlock_bh(&peer->endpoint_lock); +} + +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb) +{ + struct endpoint endpoint; + + if (!wg_socket_endpoint_from_skb(&endpoint, skb)) + wg_socket_set_peer_endpoint(peer, &endpoint); +} + +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); + dst_cache_reset(&peer->endpoint_cache); + write_unlock_bh(&peer->endpoint_lock); +} + +static int wg_receive(struct sock *sk, struct sk_buff *skb) +{ + struct wg_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + skb_mark_not_on_list(skb); + wg_packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +static void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int wg_socket_init(struct wg_device *wg, u16 port) +{ + int ret; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = wg_receive + }; + struct socket *new4 = NULL, *new6 = NULL; + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .local_udp_port = htons(port), + .use_udp_checksums = true + }; +#if IS_ENABLED(CONFIG_IPV6) + int retries = 0; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, + .ipv6_v6only = true + }; +#endif + +#if IS_ENABLED(CONFIG_IPV6) +retry: +#endif + + ret = udp_sock_create(wg->creating_net, &port4, &new4); + if (ret < 0) { + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name); + return ret; + } + set_sock_opts(new4); + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + +#if IS_ENABLED(CONFIG_IPV6) + if (ipv6_mod_enabled()) { + port6.local_udp_port = inet_sk(new4->sk)->inet_sport; + ret = udp_sock_create(wg->creating_net, &port6, &new6); + if (ret < 0) { + udp_tunnel_sock_release(new4); + if (ret == -EADDRINUSE && !port && retries++ < 100) + goto retry; + pr_err("%s: Could not create IPv6 socket\n", + wg->dev->name); + return ret; + } + set_sock_opts(new6); + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + } +#endif + + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL); + return 0; +} + +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6) +{ + struct sock *old4, *old6; + + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, + lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, + lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, new4); + rcu_assign_pointer(wg->sock6, new6); + if (new4) + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); + mutex_unlock(&wg->socket_update_lock); + synchronize_rcu(); + sock_free(old4); + sock_free(old6); +} diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h new file mode 100644 index 000000000000..bab5848efbcd --- /dev/null +++ b/drivers/net/wireguard/socket.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_SOCKET_H +#define _WG_SOCKET_H + +#include +#include +#include +#include + +int wg_socket_init(struct wg_device *wg, u16 port); +void wg_socket_reinit(struct wg_device *wg, struct sock *new4, + struct sock *new6); +int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data, + size_t len, u8 ds); +int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, + u8 ds); +int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg, + struct sk_buff *in_skb, + void *out_buffer, size_t len); + +int wg_socket_endpoint_from_skb(struct endpoint *endpoint, + const struct sk_buff *skb); +void wg_socket_set_peer_endpoint(struct wg_peer *peer, + const struct endpoint *endpoint); +void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer, + const struct sk_buff *skb); +void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer); + +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG) +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \ + struct endpoint __endpoint; \ + wg_socket_endpoint_from_skb(&__endpoint, skb); \ + net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \ + ##__VA_ARGS__); \ + } while (0) +#else +#define net_dbg_skb_ratelimited(fmt, skb, ...) +#endif + +#endif /* _WG_SOCKET_H */ diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c new file mode 100644 index 000000000000..d54d32ac9bc4 --- /dev/null +++ b/drivers/net/wireguard/timers.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "timers.h" +#include "device.h" +#include "peer.h" +#include "queueing.h" +#include "socket.h" + +/* + * - Timer for retransmitting the handshake if we don't hear back after + * `REKEY_TIMEOUT + jitter` ms. + * + * - Timer for sending empty packet if we have received a packet but after have + * not sent one for `KEEPALIVE_TIMEOUT` ms. + * + * - Timer for initiating new handshake if we have sent a packet but after have + * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) + + * jitter` ms. + * + * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms + * if no new keys have been received. + * + * - Timer for, if enabled, sending an empty authenticated packet every user- + * specified seconds. + */ + +static inline void mod_peer_timer(struct wg_peer *peer, + struct timer_list *timer, + unsigned long expires) +{ + rcu_read_lock_bh(); + if (likely(netif_running(peer->device->dev) && + !READ_ONCE(peer->is_dead))) + mod_timer(timer, expires); + rcu_read_unlock_bh(); +} + +static void wg_expired_retransmit_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_retransmit_handshake); + + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + + del_timer(&peer->timer_send_keepalive); + /* We drop all packets without a keypair and don't try again, + * if we try unsuccessfully for too long to make a handshake. + */ + wg_packet_purge_staged_packets(peer); + + /* We set a timer for destroying any residue that might be left + * of a partial exchange. + */ + if (!timer_pending(&peer->timer_zero_key_material)) + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); + } else { + ++peer->timer_handshake_attempts; + pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REKEY_TIMEOUT, + peer->timer_handshake_attempts + 1); + + /* We clear the endpoint address src address, in case this is + * the cause of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + + wg_packet_send_queued_handshake_initiation(peer, true); + } +} + +static void wg_expired_send_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive); + + wg_packet_send_keepalive(peer); + if (peer->timer_need_another_keepalive) { + peer->timer_need_another_keepalive = false; + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + } +} + +static void wg_expired_new_handshake(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake); + + pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + /* We clear the endpoint address src address, in case this is the cause + * of trouble. + */ + wg_socket_clear_peer_endpoint_src(peer); + wg_packet_send_queued_handshake_initiation(peer, false); +} + +static void wg_expired_zero_key_material(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material); + + rcu_read_lock_bh(); + if (!READ_ONCE(peer->is_dead)) { + wg_peer_get(peer); + if (!queue_work(peer->device->handshake_send_wq, + &peer->clear_peer_work)) + /* If the work was already on the queue, we want to drop + * the extra reference. + */ + wg_peer_put(peer); + } + rcu_read_unlock_bh(); +} + +static void wg_queued_expired_zero_key_material(struct work_struct *work) +{ + struct wg_peer *peer = container_of(work, struct wg_peer, + clear_peer_work); + + pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", + peer->device->dev->name, peer->internal_id, + &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + wg_noise_handshake_clear(&peer->handshake); + wg_noise_keypairs_clear(&peer->keypairs); + wg_peer_put(peer); +} + +static void wg_expired_send_persistent_keepalive(struct timer_list *timer) +{ + struct wg_peer *peer = from_timer(peer, timer, + timer_persistent_keepalive); + + if (likely(peer->persistent_keepalive_interval)) + wg_packet_send_keepalive(peer); +} + +/* Should be called after an authenticated data packet is sent. */ +void wg_timers_data_sent(struct wg_peer *peer) +{ + if (!timer_pending(&peer->timer_new_handshake)) + mod_peer_timer(peer, &peer->timer_new_handshake, + jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after an authenticated data packet is received. */ +void wg_timers_data_received(struct wg_peer *peer) +{ + if (likely(netif_running(peer->device->dev))) { + if (!timer_pending(&peer->timer_send_keepalive)) + mod_peer_timer(peer, &peer->timer_send_keepalive, + jiffies + KEEPALIVE_TIMEOUT * HZ); + else + peer->timer_need_another_keepalive = true; + } +} + +/* Should be called after any type of authenticated packet is sent, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer) +{ + del_timer(&peer->timer_send_keepalive); +} + +/* Should be called after any type of authenticated packet is received, whether + * keepalive, data, or handshake. + */ +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer) +{ + del_timer(&peer->timer_new_handshake); +} + +/* Should be called after a handshake initiation message is sent. */ +void wg_timers_handshake_initiated(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_retransmit_handshake, + jiffies + REKEY_TIMEOUT * HZ + + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES)); +} + +/* Should be called after a handshake response message is received and processed + * or when getting key confirmation via the first data message. + */ +void wg_timers_handshake_complete(struct wg_peer *peer) +{ + del_timer(&peer->timer_retransmit_handshake); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + ktime_get_real_ts64(&peer->walltime_last_handshake); +} + +/* Should be called after an ephemeral key is created, which is before sending a + * handshake response or after receiving a handshake response. + */ +void wg_timers_session_derived(struct wg_peer *peer) +{ + mod_peer_timer(peer, &peer->timer_zero_key_material, + jiffies + REJECT_AFTER_TIME * 3 * HZ); +} + +/* Should be called before a packet with authentication, whether + * keepalive, data, or handshakem is sent, or after one is received. + */ +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer) +{ + if (peer->persistent_keepalive_interval) + mod_peer_timer(peer, &peer->timer_persistent_keepalive, + jiffies + peer->persistent_keepalive_interval * HZ); +} + +void wg_timers_init(struct wg_peer *peer) +{ + timer_setup(&peer->timer_retransmit_handshake, + wg_expired_retransmit_handshake, 0); + timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0); + timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0); + timer_setup(&peer->timer_zero_key_material, + wg_expired_zero_key_material, 0); + timer_setup(&peer->timer_persistent_keepalive, + wg_expired_send_persistent_keepalive, 0); + INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material); + peer->timer_handshake_attempts = 0; + peer->sent_lastminute_handshake = false; + peer->timer_need_another_keepalive = false; +} + +void wg_timers_stop(struct wg_peer *peer) +{ + del_timer_sync(&peer->timer_retransmit_handshake); + del_timer_sync(&peer->timer_send_keepalive); + del_timer_sync(&peer->timer_new_handshake); + del_timer_sync(&peer->timer_zero_key_material); + del_timer_sync(&peer->timer_persistent_keepalive); + flush_work(&peer->clear_peer_work); +} diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h new file mode 100644 index 000000000000..f0653dcb1326 --- /dev/null +++ b/drivers/net/wireguard/timers.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _WG_TIMERS_H +#define _WG_TIMERS_H + +#include + +struct wg_peer; + +void wg_timers_init(struct wg_peer *peer); +void wg_timers_stop(struct wg_peer *peer); +void wg_timers_data_sent(struct wg_peer *peer); +void wg_timers_data_received(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_received(struct wg_peer *peer); +void wg_timers_handshake_initiated(struct wg_peer *peer); +void wg_timers_handshake_complete(struct wg_peer *peer); +void wg_timers_session_derived(struct wg_peer *peer); +void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer); + +static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds, + u64 expiration_seconds) +{ + return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC) + <= (s64)ktime_get_coarse_boottime_ns(); +} + +#endif /* _WG_TIMERS_H */ diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h new file mode 100644 index 000000000000..a1a269a11634 --- /dev/null +++ b/drivers/net/wireguard/version.h @@ -0,0 +1 @@ +#define WIREGUARD_VERSION "1.0.0" diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index bb44f5a0941b..4822a65f6f3c 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1604,11 +1604,22 @@ static int ath10k_pci_dump_memory_reg(struct ath10k *ar, { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); u32 i; + int ret; + + mutex_lock(&ar->conf_mutex); + if (ar->state != ATH10K_STATE_ON) { + ath10k_warn(ar, "Skipping pci_dump_memory_reg invalid state\n"); + ret = -EIO; + goto done; + } for (i = 0; i < region->len; i += 4) *(u32 *)(buf + i) = ioread32(ar_pci->mem + region->start + i); - return region->len; + ret = region->len; +done: + mutex_unlock(&ar->conf_mutex); + return ret; } /* if an error happened returns < 0, otherwise the length */ @@ -1704,7 +1715,11 @@ static void ath10k_pci_dump_memory(struct ath10k *ar, count = ath10k_pci_dump_memory_sram(ar, current_region, buf); break; case ATH10K_MEM_REGION_TYPE_IOREG: - count = ath10k_pci_dump_memory_reg(ar, current_region, buf); + ret = ath10k_pci_dump_memory_reg(ar, current_region, buf); + if (ret < 0) + break; + + count = ret; break; default: ret = ath10k_pci_dump_memory_generic(ar, current_region, buf); diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 16177497bba7..7e85c4916e7f 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1563,13 +1563,16 @@ static int ath10k_snoc_probe(struct platform_device *pdev) ret = ath10k_qmi_init(ar, msa_size); if (ret) { ath10k_warn(ar, "failed to register wlfw qmi client: %d\n", ret); - goto err_core_destroy; + goto err_power_off; } ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc probe\n"); return 0; +err_power_off: + ath10k_hw_power_off(ar); + err_free_irq: ath10k_snoc_free_irq(ar); diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 69a1ec53df29..7b358484940e 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -3707,6 +3707,7 @@ ath10k_wmi_tlv_op_gen_config_pno_start(struct ath10k *ar, struct wmi_tlv *tlv; struct sk_buff *skb; __le32 *channel_list; + u16 tlv_len; size_t len; void *ptr; u32 i; @@ -3764,10 +3765,12 @@ ath10k_wmi_tlv_op_gen_config_pno_start(struct ath10k *ar, /* nlo_configured_parameters(nlo_list) */ cmd->no_of_ssids = __cpu_to_le32(min_t(u8, pno->uc_networks_count, WMI_NLO_MAX_SSIDS)); + tlv_len = __le32_to_cpu(cmd->no_of_ssids) * + sizeof(struct nlo_configured_parameters); tlv = ptr; tlv->tag = __cpu_to_le16(WMI_TLV_TAG_ARRAY_STRUCT); - tlv->len = __cpu_to_le16(len); + tlv->len = __cpu_to_le16(tlv_len); ptr += sizeof(*tlv); nlo_list = ptr; diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 9f564e2b7a14..214d65108b29 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -9476,7 +9476,7 @@ static int ath10k_wmi_mgmt_tx_clean_up_pending(int msdu_id, void *ptr, msdu = pkt_addr->vaddr; dma_unmap_single(ar->dev, pkt_addr->paddr, - msdu->len, DMA_FROM_DEVICE); + msdu->len, DMA_TO_DEVICE); ieee80211_free_txskb(ar->hw, msdu); return 0; diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index fb649d85b8fc..dd0c32379375 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1216,7 +1216,7 @@ static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context) static int send_eject_command(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); - struct usb_host_interface *iface_desc = &interface->altsetting[0]; + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; diff --git a/drivers/net/wireless/ath/wil6210/txrx_edma.c b/drivers/net/wireless/ath/wil6210/txrx_edma.c index 778b63be6a9a..02548d40253c 100644 --- a/drivers/net/wireless/ath/wil6210/txrx_edma.c +++ b/drivers/net/wireless/ath/wil6210/txrx_edma.c @@ -869,6 +869,7 @@ static struct sk_buff *wil_sring_reap_rx_edma(struct wil6210_priv *wil, u8 data_offset; struct wil_rx_status_extended *s; u16 sring_idx = sring - wil->srings; + int invalid_buff_id_retry; BUILD_BUG_ON(sizeof(struct wil_rx_status_extended) > sizeof(skb->cb)); @@ -882,9 +883,9 @@ static struct sk_buff *wil_sring_reap_rx_edma(struct wil6210_priv *wil, /* Extract the buffer ID from the status message */ buff_id = le16_to_cpu(wil_rx_status_get_buff_id(msg)); + invalid_buff_id_retry = 0; while (!buff_id) { struct wil_rx_status_extended *s; - int invalid_buff_id_retry = 0; wil_dbg_txrx(wil, "buff_id is not updated yet by HW, (swhead 0x%x)\n", diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index 4325e91736eb..8b6b657c4b85 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -1275,8 +1275,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev) } /* Interrupt handler bottom-half */ -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev) +static void b43legacy_interrupt_tasklet(unsigned long data) { + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data; u32 reason; u32 dma_reason[ARRAY_SIZE(dev->dma_reason)]; u32 merged_dma_reason = 0; @@ -3741,7 +3742,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev, b43legacy_set_status(wldev, B43legacy_STAT_UNINIT); wldev->bad_frames_preempt = modparam_bad_frames_preempt; tasklet_init(&wldev->isr_tasklet, - (void (*)(unsigned long))b43legacy_interrupt_tasklet, + b43legacy_interrupt_tasklet, (unsigned long)wldev); if (modparam_pio) wldev->__using_pio = true; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 7ba9f6a68645..1f5deea5a288 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -2092,7 +2092,8 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, /* firmware requires unique mac address for p2pdev interface */ if (addr && ether_addr_equal(addr, pri_ifp->mac_addr)) { bphy_err(drvr, "discovery vif must be different from primary interface\n"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto fail; } brcmf_p2p_generate_bss_mac(p2p, addr); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 264ad63232f8..a935993a3c51 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1935,6 +1935,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes) BRCMF_SDIO_FT_NORMAL)) { rd->len = 0; brcmu_pkt_buf_free_skb(pkt); + continue; } bus->sdcnt.rx_readahead_cnt++; if (rd->len != roundup(rd_new.len, 16)) { @@ -4225,6 +4226,12 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err, } if (err == 0) { + /* Assign bus interface call back */ + sdiod->bus_if->dev = sdiod->dev; + sdiod->bus_if->ops = &brcmf_sdio_bus_ops; + sdiod->bus_if->chip = bus->ci->chip; + sdiod->bus_if->chiprev = bus->ci->chiprev; + /* Allow full data communication using DPC from now on. */ brcmf_sdiod_change_state(bus->sdiodev, BRCMF_SDIOD_DATA); @@ -4241,12 +4248,6 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err, sdio_release_host(sdiod->func1); - /* Assign bus interface call back */ - sdiod->bus_if->dev = sdiod->dev; - sdiod->bus_if->ops = &brcmf_sdio_bus_ops; - sdiod->bus_if->chip = bus->ci->chip; - sdiod->bus_if->chiprev = bus->ci->chiprev; - err = brcmf_alloc(sdiod->dev, sdiod->settings); if (err) { brcmf_err("brcmf_alloc failed\n"); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 06f3c01f10b3..575ed19e9195 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -430,6 +430,7 @@ brcmf_usbdev_qinit(struct list_head *q, int qsize) usb_free_urb(req->urb); list_del(q->next); } + kfree(reqs); return NULL; } @@ -1348,7 +1349,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) goto fail; } - desc = &intf->altsetting[0].desc; + desc = &intf->cur_altsetting->desc; if ((desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || (desc->bInterfaceSubClass != 2) || (desc->bInterfaceProtocol != 0xff)) { @@ -1361,7 +1362,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) num_of_eps = desc->bNumEndpoints; for (ep = 0; ep < num_of_eps; ep++) { - endpoint = &intf->altsetting[0].endpoint[ep].desc; + endpoint = &intf->cur_altsetting->endpoint[ep].desc; endpoint_num = usb_endpoint_num(endpoint); if (!usb_endpoint_xfer_bulk(endpoint)) continue; diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index c4c83ab60cbc..0579554ed4b3 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -3206,8 +3206,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) } } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv) +static void ipw2100_irq_tasklet(unsigned long data) { + struct ipw2100_priv *priv = (struct ipw2100_priv *)data; struct net_device *dev = priv->net_dev; unsigned long flags; u32 inta, tmp; @@ -6006,7 +6007,7 @@ static void ipw2100_rf_kill(struct work_struct *work) spin_unlock_irqrestore(&priv->low_lock, flags); } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv); +static void ipw2100_irq_tasklet(unsigned long data); static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, @@ -6136,7 +6137,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill); INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event); - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw2100_irq_tasklet, (unsigned long)priv); /* NOTE: We do not start the deferred work for status checks yet */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 31e43fc1d12b..5ef6f87a48ac 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1945,8 +1945,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); } -static void ipw_irq_tasklet(struct ipw_priv *priv) +static void ipw_irq_tasklet(unsigned long data) { + struct ipw_priv *priv = (struct ipw_priv *)data; u32 inta, inta_mask, handled = 0; unsigned long flags; int rc = 0; @@ -10677,7 +10678,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv) INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate); #endif /* CONFIG_IPW2200_QOS */ - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw_irq_tasklet, (unsigned long)priv); return ret; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 1168055da182..206b43b9dff8 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1376,8 +1376,9 @@ il3945_dump_nic_error_log(struct il_priv *il) } static void -il3945_irq_tasklet(struct il_priv *il) +il3945_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -3401,7 +3402,7 @@ il3945_setup_deferred_work(struct il_priv *il) timer_setup(&il->watchdog, il_bg_watchdog, 0); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il3945_irq_tasklet, + il3945_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index 3664f56f8cbd..d1e17589dbeb 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4343,8 +4343,9 @@ il4965_synchronize_irq(struct il_priv *il) } static void -il4965_irq_tasklet(struct il_priv *il) +il4965_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -6237,7 +6238,7 @@ il4965_setup_deferred_work(struct il_priv *il) timer_setup(&il->watchdog, il_bg_watchdog, 0); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il4965_irq_tasklet, + il4965_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index d966b29b45ee..348c17ce72f5 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -699,7 +699,7 @@ il_eeprom_init(struct il_priv *il) u32 gp = _il_rd(il, CSR_EEPROM_GP); int sz; int ret; - u16 addr; + int addr; /* allocate eeprom */ sz = il->cfg->eeprom_size; diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 48d375a86d86..ba2aff3af0fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -27,7 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -491,13 +491,13 @@ int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, } IWL_EXPORT_SYMBOL(iwl_validate_sar_geo_profile); -void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table) +int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table) { int ret, i, j; if (!iwl_sar_geo_support(fwrt)) - return; + return -EOPNOTSUPP; ret = iwl_sar_get_wgds_table(fwrt); if (ret < 0) { @@ -505,7 +505,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, "Geo SAR BIOS table invalid or unavailable. (%d)\n", ret); /* we don't fail if the table is not available */ - return; + return -ENOENT; } BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * @@ -530,5 +530,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, i, j, value[1], value[2], value[0]); } } + + return 0; } IWL_EXPORT_SYMBOL(iwl_sar_geo_init); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 4a6e8262974b..5590e5cc8fbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -27,7 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -171,8 +171,9 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt); int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, struct iwl_host_cmd *cmd); -void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table); +int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table); + #else /* CONFIG_ACPI */ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) @@ -243,9 +244,10 @@ static inline int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, return -ENOENT; } -static inline void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table) +static inline int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table) { + return -ENOENT; } #endif /* CONFIG_ACPI */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index 9f4b117db9d7..d47f76890cf9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -8,6 +8,7 @@ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH * Copyright (C) 2018 Intel Corporation * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,6 +31,7 @@ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH * Copyright (C) 2018 Intel Corporation * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -389,6 +391,8 @@ void iwl_mvm_ftm_abort(struct iwl_mvm *mvm, struct cfg80211_pmsr_request *req) if (req != mvm->ftm_initiator.req) return; + iwl_mvm_ftm_reset(mvm); + if (iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_RANGE_ABORT_CMD, LOCATION_GROUP, 0), 0, sizeof(cmd), &cmd)) @@ -502,7 +506,6 @@ void iwl_mvm_ftm_range_resp(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) lockdep_assert_held(&mvm->mutex); if (!mvm->ftm_initiator.req) { - IWL_ERR(mvm, "Got FTM response but have no request?\n"); return; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index c09624d8d7ee..81b7da5815eb 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -749,10 +749,17 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT); union geo_tx_power_profiles_cmd cmd; u16 len; + int ret; cmd.geo_cmd.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES); - iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table); + ret = iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table); + /* + * It is a valid scenario to not support SAR, or miss wgds table, + * but in that case there is no need to send the command. + */ + if (ret) + return 0; cmd.geo_cmd.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6717f25c46b1..02df603b6400 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -5,10 +5,9 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -28,10 +27,9 @@ * * BSD LICENSE * - * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -2037,7 +2035,7 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, rcu_read_lock(); sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_ctxt_cmd.sta_id]); - if (IS_ERR(sta)) { + if (IS_ERR_OR_NULL(sta)) { rcu_read_unlock(); WARN(1, "Can't find STA to configure HE\n"); return; @@ -3293,7 +3291,7 @@ static void iwl_mvm_mac_mgd_prepare_tx(struct ieee80211_hw *hw, if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) iwl_mvm_schedule_session_protection(mvm, vif, 900, - min_duration); + min_duration, false); else iwl_mvm_protect_session(mvm, vif, duration, min_duration, 500, false); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 46128a2a9c6e..e98ce380c7b9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -308,7 +308,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 7b35f416404c..64ef3f3ba23b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -3320,6 +3320,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, igtk_cmd.sta_id = cpu_to_le32(sta_id); if (remove_key) { + /* This is a valid situation for IGTK */ + if (sta_id == IWL_MVM_INVALID_STA) + return 0; + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); } else { struct ieee80211_key_seq seq; @@ -3574,9 +3578,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c index 1851719e9f4b..d781777b6b96 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c @@ -205,9 +205,15 @@ void iwl_mvm_mac_mgd_protect_tdls_discover(struct ieee80211_hw *hw, struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); u32 duration = 2 * vif->bss_conf.dtim_period * vif->bss_conf.beacon_int; - mutex_lock(&mvm->mutex); /* Protect the session to hear the TDLS setup response on the channel */ - iwl_mvm_protect_session(mvm, vif, duration, duration, 100, true); + mutex_lock(&mvm->mutex); + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) + iwl_mvm_schedule_session_protection(mvm, vif, duration, + duration, true); + else + iwl_mvm_protect_session(mvm, vif, duration, + duration, 100, true); mutex_unlock(&mvm->mutex); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 51b138673ddb..c0b420fe5e48 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1056,13 +1056,42 @@ int iwl_mvm_schedule_csa_period(struct iwl_mvm *mvm, return iwl_mvm_time_event_send_add(mvm, vif, te_data, &time_cmd); } +static bool iwl_mvm_session_prot_notif(struct iwl_notif_wait_data *notif_wait, + struct iwl_rx_packet *pkt, void *data) +{ + struct iwl_mvm *mvm = + container_of(notif_wait, struct iwl_mvm, notif_wait); + struct iwl_mvm_session_prot_notif *resp; + int resp_len = iwl_rx_packet_payload_len(pkt); + + if (WARN_ON(pkt->hdr.cmd != SESSION_PROTECTION_NOTIF || + pkt->hdr.group_id != MAC_CONF_GROUP)) + return true; + + if (WARN_ON_ONCE(resp_len != sizeof(*resp))) { + IWL_ERR(mvm, "Invalid SESSION_PROTECTION_NOTIF response\n"); + return true; + } + + resp = (void *)pkt->data; + + if (!resp->status) + IWL_ERR(mvm, + "TIME_EVENT_NOTIFICATION received but not executed\n"); + + return true; +} + void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - u32 duration, u32 min_duration) + u32 duration, u32 min_duration, + bool wait_for_notif) { struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm_time_event_data *te_data = &mvmvif->time_event_data; - + const u16 notif[] = { iwl_cmd_id(SESSION_PROTECTION_NOTIF, + MAC_CONF_GROUP, 0) }; + struct iwl_notification_wait wait_notif; struct iwl_mvm_session_prot_cmd cmd = { .id_and_color = cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, @@ -1071,7 +1100,6 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, .conf_id = cpu_to_le32(SESSION_PROTECT_CONF_ASSOC), .duration_tu = cpu_to_le32(MSEC_TO_TU(duration)), }; - int ret; lockdep_assert_held(&mvm->mutex); @@ -1092,14 +1120,35 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, IWL_DEBUG_TE(mvm, "Add new session protection, duration %d TU\n", le32_to_cpu(cmd.duration_tu)); - ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD, - MAC_CONF_GROUP, 0), - 0, sizeof(cmd), &cmd); - if (ret) { + if (!wait_for_notif) { + if (iwl_mvm_send_cmd_pdu(mvm, + iwl_cmd_id(SESSION_PROTECTION_CMD, + MAC_CONF_GROUP, 0), + 0, sizeof(cmd), &cmd)) { + IWL_ERR(mvm, + "Couldn't send the SESSION_PROTECTION_CMD\n"); + spin_lock_bh(&mvm->time_event_lock); + iwl_mvm_te_clear_data(mvm, te_data); + spin_unlock_bh(&mvm->time_event_lock); + } + + return; + } + + iwl_init_notification_wait(&mvm->notif_wait, &wait_notif, + notif, ARRAY_SIZE(notif), + iwl_mvm_session_prot_notif, NULL); + + if (iwl_mvm_send_cmd_pdu(mvm, + iwl_cmd_id(SESSION_PROTECTION_CMD, + MAC_CONF_GROUP, 0), + 0, sizeof(cmd), &cmd)) { IWL_ERR(mvm, - "Couldn't send the SESSION_PROTECTION_CMD: %d\n", ret); - spin_lock_bh(&mvm->time_event_lock); - iwl_mvm_te_clear_data(mvm, te_data); - spin_unlock_bh(&mvm->time_event_lock); + "Couldn't send the SESSION_PROTECTION_CMD\n"); + iwl_remove_notification(&mvm->notif_wait, &wait_notif); + } else if (iwl_wait_notification(&mvm->notif_wait, &wait_notif, + TU_TO_JIFFIES(100))) { + IWL_ERR(mvm, + "Failed to protect session until session protection\n"); } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.h b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.h index df6832b79666..3186d7e40567 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.h @@ -250,10 +250,12 @@ iwl_mvm_te_scheduled(struct iwl_mvm_time_event_data *te_data) * @mvm: the mvm component * @vif: the virtual interface for which the protection issued * @duration: the duration of the protection + * @wait_for_notif: if true, will block until the start of the protection */ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, struct ieee80211_vif *vif, - u32 duration, u32 min_duration); + u32 duration, u32 min_duration, + bool wait_for_notif); /** * iwl_mvm_rx_session_protect_notif - handles %SESSION_PROTECTION_NOTIF diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index b5a16f00bada..fcad25ffd811 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -734,7 +734,8 @@ static struct thermal_zone_device_ops tzone_ops = { static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) { int i; - char name[] = "iwlwifi"; + char name[16]; + static atomic_t counter = ATOMIC_INIT(0); if (!iwl_mvm_is_tt_in_fw(mvm)) { mvm->tz_device.tzone = NULL; @@ -744,6 +745,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH); + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF); mvm->tz_device.tzone = thermal_zone_device_register(name, IWL_MAX_DTS_TRIPS, IWL_WRITABLE_TRIPS_MSK, diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index b0b7eca1754e..de62a6dc4e73 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1107,6 +1107,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) cfg = &iwl9560_2ac_cfg_qu_c0_jf_b0; else if (cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0) cfg = &iwl9560_2ac_160_cfg_qu_c0_jf_b0; + else if (cfg == &killer1650s_2ax_cfg_qu_b0_hr_b0) + cfg = &killer1650s_2ax_cfg_qu_c0_hr_b0; + else if (cfg == &killer1650i_2ax_cfg_qu_b0_hr_b0) + cfg = &killer1650i_2ax_cfg_qu_c0_hr_b0; } /* same thing for QuZ... */ diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 0094b1d2b577..3ec46f48cfde 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -2508,7 +2508,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap, sta->supported_rates[0] = 2; if (sta->tx_supp_rates & WLAN_RATE_2M) sta->supported_rates[1] = 4; - if (sta->tx_supp_rates & WLAN_RATE_5M5) + if (sta->tx_supp_rates & WLAN_RATE_5M5) sta->supported_rates[2] = 11; if (sta->tx_supp_rates & WLAN_RATE_11M) sta->supported_rates[3] = 22; diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 40a8b941ad5c..e753f43e0162 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1361,7 +1361,8 @@ static int ezusb_init(struct hermes *hw) int retval; BUG_ON(in_interrupt()); - BUG_ON(!upriv); + if (!upriv) + return -EINVAL; upriv->reply_count = 0; /* Write the MAGIC number on the simulated registers to keep @@ -1608,9 +1609,9 @@ static int ezusb_probe(struct usb_interface *interface, /* set up the endpoint information */ /* check out the endpoints */ - iface_desc = &interface->altsetting[0].desc; + iface_desc = &interface->cur_altsetting->desc; for (i = 0; i < iface_desc->bNumEndpoints; ++i) { - ep = &interface->altsetting[0].endpoint[i].desc; + ep = &interface->cur_altsetting->endpoint[i].desc; if (usb_endpoint_is_bulk_in(ep)) { /* we found a bulk in endpoint */ diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index c9401c121a14..4e3de684928b 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -1785,6 +1785,8 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, rates_max = rates_eid[1]; if (rates_max > MAX_RATES) { lbs_deb_join("invalid rates"); + rcu_read_unlock(); + ret = -EINVAL; goto out; } rates = cmd.bss.rates; diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 547ff3c578ee..fa5634af40f7 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -1295,19 +1295,6 @@ mwifiex_copy_rates(u8 *dest, u32 pos, u8 *src, int len) return pos; } -/* This function return interface number with the same bss_type. - */ -static inline u8 -mwifiex_get_intf_num(struct mwifiex_adapter *adapter, u8 bss_type) -{ - u8 i, num = 0; - - for (i = 0; i < adapter->priv_num; i++) - if (adapter->priv[i] && adapter->priv[i]->bss_type == bss_type) - num++; - return num; -} - /* * This function returns the correct private structure pointer based * upon the BSS type and BSS number. diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 98f942b797f7..a7968a84aaf8 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -2884,6 +2884,13 @@ mwifiex_cmd_append_vsie_tlv(struct mwifiex_private *priv, vs_param_set->header.len = cpu_to_le16((((u16) priv->vs_ie[id].ie[1]) & 0x00FF) + 2); + if (le16_to_cpu(vs_param_set->header.len) > + MWIFIEX_MAX_VSIE_LEN) { + mwifiex_dbg(priv->adapter, ERROR, + "Invalid param length!\n"); + break; + } + memcpy(vs_param_set->ie, priv->vs_ie[id].ie, le16_to_cpu(vs_param_set->header.len)); *buffer += le16_to_cpu(vs_param_set->header.len) + diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 6dd835f1efc2..fbfa0b15d0c8 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -232,6 +232,7 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, if (country_ie_len > (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { + rcu_read_unlock(); mwifiex_dbg(priv->adapter, ERROR, "11D: country_ie_len overflow!, deauth AP\n"); return -EINVAL; diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index 7caf1d26124a..f8f282ce39bd 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -894,7 +894,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, *pos, *end; u8 i, action, basic; u16 cap = 0; - int ie_len = 0; + int ies_len = 0; if (len < (sizeof(struct ethhdr) + 3)) return; @@ -916,7 +916,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, pos = buf + sizeof(struct ethhdr) + 4; /* payload 1+ category 1 + action 1 + dialog 1 */ cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; pos += 2; break; @@ -926,7 +926,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, /* payload 1+ category 1 + action 1 + dialog 1 + status code 2*/ pos = buf + sizeof(struct ethhdr) + 6; cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; pos += 2; break; @@ -934,7 +934,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, if (len < (sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN)) return; pos = buf + sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN; - ie_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; break; default: mwifiex_dbg(priv->adapter, ERROR, "Unknown TDLS frame type.\n"); @@ -947,33 +947,33 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->tdls_cap.capab = cpu_to_le16(cap); - for (end = pos + ie_len; pos + 1 < end; pos += 2 + pos[1]) { - if (pos + 2 + pos[1] > end) + for (end = pos + ies_len; pos + 1 < end; pos += 2 + pos[1]) { + u8 ie_len = pos[1]; + + if (pos + 2 + ie_len > end) break; switch (*pos) { case WLAN_EID_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; - sta_ptr->tdls_cap.rates_len = pos[1]; - for (i = 0; i < pos[1]; i++) + sta_ptr->tdls_cap.rates_len = ie_len; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: - if (pos[1] > 32) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) return; basic = sta_ptr->tdls_cap.rates_len; - if (pos[1] > 32 - basic) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates) - basic) return; - for (i = 0; i < pos[1]; i++) + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; - sta_ptr->tdls_cap.rates_len += pos[1]; + sta_ptr->tdls_cap.rates_len += ie_len; break; case WLAN_EID_HT_CAPABILITY: - if (pos > end - sizeof(struct ieee80211_ht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_cap)) + if (ie_len != sizeof(struct ieee80211_ht_cap)) return; /* copy the ie's value into ht_capb*/ memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, @@ -981,59 +981,45 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - if (pos > end - - sizeof(struct ieee80211_ht_operation) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_ht_operation)) + if (ie_len != sizeof(struct ieee80211_ht_operation)) return; /* copy the ie's value into ht_oper*/ memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: - if (pos > end - sizeof(struct ieee_types_header)) - return; - if (pos[1] < sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] > 8) + if (ie_len > 8) return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], 8)); + min_t(u8, ie_len, 8)); break; case WLAN_EID_RSN: - if (pos > end - sizeof(struct ieee_types_header)) + if (ie_len < sizeof(struct ieee_types_header)) return; - if (pos[1] < sizeof(struct ieee_types_header)) - return; - if (pos[1] > IEEE_MAX_IE_SIZE - + if (ie_len > IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header)) return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], IEEE_MAX_IE_SIZE - + min_t(u8, ie_len, IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: - if (pos > end - 3) - return; - if (pos[1] != 1) + if (ie_len != sizeof(pos[2])) return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_operation) - 2) - return; - if (pos[1] != + if (ie_len != sizeof(struct ieee80211_vht_operation)) return; /* copy the ie's value into vhtoper*/ @@ -1043,10 +1029,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - - sizeof(struct ieee80211_vht_cap) - 2) - return; - if (pos[1] != sizeof(struct ieee80211_vht_cap)) + if (ie_len != sizeof(struct ieee80211_vht_cap)) return; /* copy the ie's value into vhtcap*/ memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, @@ -1056,9 +1039,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, break; case WLAN_EID_AID: if (priv->adapter->is_hw_11ac_capable) { - if (pos > end - 4) - return; - if (pos[1] != 2) + if (ie_len != sizeof(u16)) return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index 41f0231376c0..132f9e8ed68c 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -970,6 +970,10 @@ int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv, "WMM Parameter Set Count: %d\n", wmm_param_ie->qos_info_bitmap & mask); + if (wmm_param_ie->vend_hdr.len + 2 > + sizeof(struct ieee_types_wmm_parameter)) + break; + memcpy((u8 *) &priv->curr_bss_params.bss_descriptor. wmm_ie, wmm_param_ie, wmm_param_ie->vend_hdr.len + 2); diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 6173c80189ba..1847f55e199b 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -447,10 +447,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, struct page *page = virt_to_head_page(data); int offset = data - page_address(page); struct sk_buff *skb = q->rx_head; + struct skb_shared_info *shinfo = skb_shinfo(skb); - offset += q->buf_offset; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, len, - q->buf_size); + if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { + offset += q->buf_offset; + skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, + q->buf_size); + } if (more) return; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c index eccad4987ac8..17e277bf39e0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c @@ -92,8 +92,9 @@ static int mt7615_check_eeprom(struct mt76_dev *dev) static void mt7615_eeprom_parse_hw_cap(struct mt7615_dev *dev) { - u8 val, *eeprom = dev->mt76.eeprom.data; + u8 *eeprom = dev->mt76.eeprom.data; u8 tx_mask, rx_mask, max_nss; + u32 val; val = FIELD_GET(MT_EE_NIC_WIFI_CONF_BAND_SEL, eeprom[MT_EE_WIFI_CONF]); diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index aa2bb2ae9809..54a1a4ea107b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -6384,7 +6384,7 @@ static int rtl8xxxu_parse_usb(struct rtl8xxxu_priv *priv, u8 dir, xtype, num; int ret = 0; - host_interface = &interface->altsetting[0]; + host_interface = interface->cur_altsetting; interface_desc = &host_interface->desc; endpoints = interface_desc->bNumEndpoints; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index f88d26535978..25335bd2873b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1061,13 +1061,15 @@ static irqreturn_t _rtl_pci_interrupt(int irq, void *dev_id) return ret; } -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_irq_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; _rtl_pci_tx_chk_waitq(hw); } -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1193,10 +1195,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw, /*task */ tasklet_init(&rtlpriv->works.irq_tasklet, - (void (*)(unsigned long))_rtl_pci_irq_tasklet, + _rtl_pci_irq_tasklet, (unsigned long)hw); tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet, - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet, + _rtl_pci_prepare_bcn_tasklet, (unsigned long)hw); INIT_WORK(&rtlpriv->works.lps_change_work, rtl_lps_change_work_callback); diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index ae61415e1665..f369ddca953a 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -706,8 +706,8 @@ void rtw_update_sta_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si) if (sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80) is_support_sgi = true; } else if (sta->ht_cap.ht_supported) { - ra_mask |= (sta->ht_cap.mcs.rx_mask[NL80211_BAND_5GHZ] << 20) | - (sta->ht_cap.mcs.rx_mask[NL80211_BAND_2GHZ] << 12); + ra_mask |= (sta->ht_cap.mcs.rx_mask[1] << 20) | + (sta->ht_cap.mcs.rx_mask[0] << 12); if (sta->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC) stbc_en = HT_STBC_EN; if (sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING) @@ -717,6 +717,9 @@ void rtw_update_sta_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si) is_support_sgi = true; } + if (efuse->hw_cap.nss == 1) + ra_mask &= RA_MASK_VHT_RATES_1SS | RA_MASK_HT_RATES_1SS; + if (hal->current_band_type == RTW_BAND_5G) { ra_mask |= (u64)sta->supp_rates[NL80211_BAND_5GHZ] << 4; if (sta->vht_cap.vht_supported) { @@ -750,11 +753,6 @@ void rtw_update_sta_info(struct rtw_dev *rtwdev, struct rtw_sta_info *si) wireless_set = 0; } - if (efuse->hw_cap.nss == 1) { - ra_mask &= RA_MASK_VHT_RATES_1SS; - ra_mask &= RA_MASK_HT_RATES_1SS; - } - switch (sta->bandwidth) { case IEEE80211_STA_RX_BW_80: bw_mode = RTW_CHANNEL_WIDTH_80; diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c index a58e8276a41a..a6746b5a9ff2 100644 --- a/drivers/net/wireless/realtek/rtw88/pci.c +++ b/drivers/net/wireless/realtek/rtw88/pci.c @@ -832,6 +832,11 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci, while (count--) { skb = skb_dequeue(&ring->queue); + if (!skb) { + rtw_err(rtwdev, "failed to dequeue %d skb TX queue %d, BD=0x%08x, rp %d -> %d\n", + count, hw_queue, bd_idx, ring->r.rp, cur_rp); + break; + } tx_data = rtw_pci_get_tx_data(skb); pci_unmap_single(rtwpci->pdev, tx_data->dma, skb->len, PCI_DMA_TODEVICE); diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index f84250bdb8cf..6f8d5f9a9f7e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -622,6 +622,7 @@ static int bl_cmd(struct rsi_hw *adapter, u8 cmd, u8 exp_resp, char *str) bl_start_cmd_timer(adapter, timeout); status = bl_write_cmd(adapter, cmd, exp_resp, ®out_val); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: Command %s (%0x) writing failed..\n", __func__, str, cmd); @@ -737,10 +738,9 @@ static int ping_pong_write(struct rsi_hw *adapter, u8 cmd, u8 *addr, u32 size) } status = bl_cmd(adapter, cmd_req, cmd_resp, str); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + return 0; } @@ -828,10 +828,9 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content, status = bl_cmd(adapter, EOF_REACHED, FW_LOADING_SUCCESSFUL, "EOF_REACHED"); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + rsi_dbg(INFO_ZONE, "FW loading is done and FW is running..\n"); return 0; } @@ -849,6 +848,7 @@ static int rsi_hal_prepare_fwload(struct rsi_hw *adapter) ®out_val, RSI_COMMON_REG_SIZE); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: REGOUT read failed\n", __func__); return status; diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 53f41fc2cadf..668585010cd6 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -16,6 +16,7 @@ */ #include +#include #include #include "rsi_usb.h" #include "rsi_hal.h" @@ -29,7 +30,7 @@ MODULE_PARM_DESC(dev_oper_mode, "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" "6[AP + BT classic], 14[AP + BT classic + BT LE]"); -static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num); +static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t flags); /** * rsi_usb_card_write() - This function writes to the USB Card. @@ -117,7 +118,7 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface, __le16 buffer_size; int ii, bin_found = 0, bout_found = 0; - iface_desc = &(interface->altsetting[0]); + iface_desc = interface->cur_altsetting; for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) { endpoint = &(iface_desc->endpoint[ii].desc); @@ -285,20 +286,29 @@ static void rsi_rx_done_handler(struct urb *urb) status = 0; out: - if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num)) + if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num, GFP_ATOMIC)) rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__); if (status) dev_kfree_skb(rx_cb->rx_skb); } +static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num) +{ + struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev; + struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1]; + struct urb *urb = rx_cb->rx_urb; + + usb_kill_urb(urb); +} + /** * rsi_rx_urb_submit() - This function submits the given URB to the USB stack. * @adapter: Pointer to the adapter structure. * * Return: 0 on success, a negative error code on failure. */ -static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num) +static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t mem_flags) { struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev; struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1]; @@ -328,9 +338,11 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num) rsi_rx_done_handler, rx_cb); - status = usb_submit_urb(urb, GFP_KERNEL); - if (status) + status = usb_submit_urb(urb, mem_flags); + if (status) { rsi_dbg(ERR_ZONE, "%s: Failed in urb submission\n", __func__); + dev_kfree_skb(skb); + } return status; } @@ -816,17 +828,20 @@ static int rsi_probe(struct usb_interface *pfunction, rsi_dbg(INIT_ZONE, "%s: Device Init Done\n", __func__); } - status = rsi_rx_urb_submit(adapter, WLAN_EP); + status = rsi_rx_urb_submit(adapter, WLAN_EP, GFP_KERNEL); if (status) goto err1; if (adapter->priv->coex_mode > 1) { - status = rsi_rx_urb_submit(adapter, BT_EP); + status = rsi_rx_urb_submit(adapter, BT_EP, GFP_KERNEL); if (status) - goto err1; + goto err_kill_wlan_urb; } return 0; + +err_kill_wlan_urb: + rsi_rx_urb_kill(adapter, WLAN_EP); err1: rsi_deinit_usb_interface(adapter); err: @@ -857,6 +872,10 @@ static void rsi_disconnect(struct usb_interface *pfunction) adapter->priv->bt_adapter = NULL; } + if (adapter->priv->coex_mode > 1) + rsi_rx_urb_kill(adapter, BT_EP); + rsi_rx_urb_kill(adapter, WLAN_EP); + rsi_reset_card(adapter); rsi_deinit_usb_interface(adapter); rsi_91x_deinit(adapter); diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 7b5c2fe5bd4d..8ff0374126e4 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1263,7 +1263,7 @@ static void print_id(struct usb_device *udev) static int eject_installer(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - struct usb_host_interface *iface_desc = &intf->altsetting[0]; + struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 720c89d6066e..4ac8cb262559 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -225,6 +225,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index cda996f6954e..2b83156efe3f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 604dba4f18af..8e4d355dc3ae 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -565,7 +565,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5dc32b72e7fa..a4d8c90ee7cc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -66,8 +66,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); * nvme_reset_wq - hosts nvme reset works * nvme_delete_wq - hosts nvme delete works * - * nvme_wq will host works such are scan, aen handling, fw activation, - * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq + * nvme_wq will host works such as scan, aen handling, fw activation, + * keep-alive, periodic reconnects etc. nvme_reset_wq * runs reset works which also flush works hosted on nvme_wq for * serialization purposes. nvme_delete_wq host controller deletion * works which flush reset works for serialization. @@ -976,7 +976,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) startka = true; spin_unlock_irqrestore(&ctrl->lock, flags); if (startka) - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } static int nvme_keep_alive(struct nvme_ctrl *ctrl) @@ -1006,7 +1006,7 @@ static void nvme_keep_alive_work(struct work_struct *work) dev_dbg(ctrl->device, "reschedule traffic based keep-alive timer\n"); ctrl->comp_seen = false; - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); return; } @@ -1023,7 +1023,7 @@ static void nvme_start_keep_alive(struct nvme_ctrl *ctrl) if (unlikely(ctrl->kato == 0)) return; - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ); } void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) @@ -1165,8 +1165,8 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, unsigned int dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -3867,7 +3867,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) if (!log) return; - if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log, + if (nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_FW_SLOT, 0, log, sizeof(*log), 0)) dev_warn(ctrl->device, "Get FW SLOT INFO log error\n"); kfree(log); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 797c18337d96..a11900cf3a36 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -715,6 +715,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) } INIT_WORK(&ctrl->ana_work, nvme_ana_work); + kfree(ctrl->ana_log_buf); ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL); if (!ctrl->ana_log_buf) { error = -ENOMEM; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 365a2ddbeaa7..d3f23d6254e4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -167,7 +167,6 @@ struct nvme_queue { /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; volatile struct nvme_completion *cqes; - struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; @@ -376,29 +375,17 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, WARN_ON(hctx_idx != 0); WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); - WARN_ON(nvmeq->tags); hctx->driver_data = nvmeq; - nvmeq->tags = &dev->admin_tagset.tags[0]; return 0; } -static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - struct nvme_queue *nvmeq = hctx->driver_data; - - nvmeq->tags = NULL; -} - static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct nvme_dev *dev = data; struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; - if (!nvmeq->tags) - nvmeq->tags = &dev->tagset.tags[hctx_idx]; - WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); hctx->driver_data = nvmeq; return 0; @@ -948,6 +935,13 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) writel(head, nvmeq->q_db + nvmeq->dev->db_stride); } +static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) +{ + if (!nvmeq->qid) + return nvmeq->dev->admin_tagset.tags[0]; + return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; +} + static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; @@ -972,7 +966,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) return; } - req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } @@ -1084,9 +1078,9 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx) spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, -1); + nvme_complete_cqes(nvmeq, start, end); spin_unlock(&nvmeq->cq_poll_lock); - nvme_complete_cqes(nvmeq, start, end); return found; } @@ -1407,6 +1401,23 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) nvme_poll_irqdisable(nvmeq, -1); } +/* + * Called only on a device that has been disabled and after all other threads + * that can check this device's completion queues have synced. This is the + * last chance for the driver to see a natural completion before + * nvme_cancel_request() terminates all incomplete requests. + */ +static void nvme_reap_pending_cqes(struct nvme_dev *dev) +{ + u16 start, end; + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) { + nvme_process_cq(&dev->queues[i], &start, &end, -1); + nvme_complete_cqes(&dev->queues[i], start, end); + } +} + static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, int entry_size) { @@ -1572,7 +1583,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_pci_complete_rq, .init_hctx = nvme_admin_init_hctx, - .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_init_request, .timeout = nvme_timeout, }; @@ -2242,11 +2252,6 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) if (timeout == 0) return false; - /* handle any remaining CQEs */ - if (opcode == nvme_admin_delete_cq && - !test_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags)) - nvme_poll_irqdisable(nvmeq, -1); - sent--; if (nr_queues) goto retry; @@ -2435,6 +2440,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_io_queues(dev); nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); + nvme_reap_pending_cqes(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); @@ -2741,6 +2747,18 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) return NVME_QUIRK_NO_APST; + } else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 || + pdev->device == 0xa808 || pdev->device == 0xa809)) || + (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) { + /* + * Forcing to use host managed nvme power settings for + * lowest idle power with quick resume latency on + * Samsung and Toshiba SSDs based on suspend behavior + * on Coffee Lake board for LENOVO C640 + */ + if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && + dmi_match(DMI_BOARD_NAME, "LNVNB161216")) + return NVME_QUIRK_SIMPLE_SUSPEND; } return 0; @@ -3103,7 +3121,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, - { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, + { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), + .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2a47c6c5007e..3e85c5cacefd 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1088,7 +1088,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl) if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) return; - queue_work(nvme_wq, &ctrl->err_work); + queue_work(nvme_reset_wq, &ctrl->err_work); } static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 6d43b23a0fc8..49d4373b84eb 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -422,7 +422,7 @@ static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return; - queue_work(nvme_wq, &to_tcp_ctrl(ctrl)->err_work); + queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); } static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, @@ -1054,7 +1054,12 @@ static void nvme_tcp_io_work(struct work_struct *w) } else if (unlikely(result < 0)) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", result); - if (result != -EPIPE) + + /* + * Fail the request unless peer closed the connection, + * in which case error recovery flow will complete all. + */ + if ((result != -EPIPE) && (result != -ECONNRESET)) nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); return; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 28438b833c1b..461987f669c5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -555,7 +555,8 @@ int nvmet_ns_enable(struct nvmet_ns *ns) } else { struct nvmet_ns *old; - list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) { + list_for_each_entry_rcu(old, &subsys->namespaces, dev_link, + lockdep_is_held(&subsys->lock)) { BUG_ON(ns->nsid == old->nsid); if (ns->nsid < old->nsid) break; @@ -938,6 +939,17 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) } EXPORT_SYMBOL_GPL(nvmet_check_data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) +{ + if (unlikely(data_len > req->transfer_len)) { + req->error_loc = offsetof(struct nvme_common_command, dptr); + nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); + return false; + } + + return true; +} + int nvmet_req_alloc_sgl(struct nvmet_req *req) { struct pci_dev *p2p_dev = NULL; @@ -1172,7 +1184,8 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl, ctrl->p2p_client = get_device(req->p2p_client); - list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link, + lockdep_is_held(&ctrl->subsys->lock)) nvmet_p2pmem_ns_add_p2p(ctrl, ns); } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index f7297473d9eb..feef15c38ec9 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -109,6 +109,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 sqsize = le16_to_cpu(c->sqsize); struct nvmet_ctrl *old; + u16 ret; old = cmpxchg(&req->sq->ctrl, NULL, ctrl); if (old) { @@ -119,7 +120,8 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (!sqsize) { pr_warn("queue size zero!\n"); req->error_loc = offsetof(struct nvmf_connect_command, sqsize); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + ret = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto err; } /* note: convert queue size from 0's-based value to 1's-based value */ @@ -132,16 +134,19 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) } if (ctrl->ops->install_queue) { - u16 ret = ctrl->ops->install_queue(req->sq); - + ret = ctrl->ops->install_queue(req->sq); if (ret) { pr_err("failed to install queue %d cntlid %d ret %x\n", - qid, ret, ctrl->cntlid); - return ret; + qid, ctrl->cntlid, ret); + goto err; } } return 0; + +err: + req->sq->ctrl = NULL; + return ret; } static void nvmet_execute_admin_connect(struct nvmet_req *req) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index b6fca0e421ef..ea0e596be15d 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -280,7 +280,7 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) static void nvmet_bdev_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; switch (le32_to_cpu(req->cmd->dsm.attributes)) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index caebfce06605..cd5670b83118 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -336,7 +336,7 @@ static void nvmet_file_dsm_work(struct work_struct *w) static void nvmet_file_execute_dsm(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 46df45e837c9..eda28b22a2c8 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -374,6 +374,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index af674fc0bb1e..5bb5342b8d0c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -515,7 +515,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) return 1; } -static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; int ret; @@ -523,9 +523,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) while (cmd->cur_sg) { struct page *page = sg_page(cmd->cur_sg); u32 left = cmd->cur_sg->length - cmd->offset; + int flags = MSG_DONTWAIT; + + if ((!last_in_batch && cmd->queue->send_list_len) || + cmd->wbytes_done + left < cmd->req.transfer_len || + queue->data_digest || !queue->nvme_sq.sqhd_disabled) + flags |= MSG_MORE; ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + left, flags); if (ret <= 0) return ret; @@ -660,7 +666,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, } if (cmd->state == NVMET_TCP_SEND_DATA) { - ret = nvmet_try_send_data(cmd); + ret = nvmet_try_send_data(cmd, last_in_batch); if (ret <= 0) goto done_send; } diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 9f1ee9c766ec..1e4a798dce6e 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -83,7 +83,7 @@ static void nvmem_cell_drop(struct nvmem_cell *cell) list_del(&cell->node); mutex_unlock(&nvmem_mutex); of_node_put(cell->np); - kfree(cell->name); + kfree_const(cell->name); kfree(cell); } @@ -110,7 +110,9 @@ static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem, cell->nvmem = nvmem; cell->offset = info->offset; cell->bytes = info->bytes; - cell->name = info->name; + cell->name = kstrdup_const(info->name, GFP_KERNEL); + if (!cell->name) + return -ENOMEM; cell->bit_offset = info->bit_offset; cell->nbits = info->nbits; @@ -300,7 +302,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) dev_err(dev, "cell %s unaligned to nvmem stride %d\n", cell->name, nvmem->stride); /* Cells already added will be freed later. */ - kfree(cell->name); + kfree_const(cell->name); kfree(cell); return -EINVAL; } diff --git a/drivers/nvmem/imx-ocotp-scu.c b/drivers/nvmem/imx-ocotp-scu.c index 03f1ab23ad51..455675dd8efe 100644 --- a/drivers/nvmem/imx-ocotp-scu.c +++ b/drivers/nvmem/imx-ocotp-scu.c @@ -15,8 +15,7 @@ #include #include -#define IMX_SIP_OTP 0xC200000A -#define IMX_SIP_OTP_WRITE 0x2 +#define IMX_SIP_OTP_WRITE 0xc200000B enum ocotp_devtype { IMX8QXP, @@ -212,8 +211,7 @@ static int imx_scu_ocotp_write(void *context, unsigned int offset, mutex_lock(&scu_ocotp_mutex); - arm_smccc_smc(IMX_SIP_OTP, IMX_SIP_OTP_WRITE, index, *buf, - 0, 0, 0, 0, &res); + arm_smccc_smc(IMX_SIP_OTP_WRITE, index, *buf, 0, 0, 0, 0, 0, &res); mutex_unlock(&scu_ocotp_mutex); diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 37c2ccbefecd..d91618641be6 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -103,4 +103,8 @@ config OF_OVERLAY config OF_NUMA bool +config OF_DMA_DEFAULT_COHERENT + # arches should select this if DMA is coherent by default for OF devices + bool + endif # OF diff --git a/drivers/of/address.c b/drivers/of/address.c index 99c1b8058559..e8a39c3ec4d4 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -995,12 +995,16 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz * @np: device node * * It returns true if "dma-coherent" property was found - * for this device in DT. + * for this device in the DT, or if DMA is coherent by + * default for OF devices on the current platform. */ bool of_dma_is_coherent(struct device_node *np) { struct device_node *node = of_node_get(np); + if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) + return true; + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 1cbb58240b80..1e5fcdee043c 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -678,15 +678,17 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) dev_err(dev, "%s: Failed to add OPP, %d\n", __func__, ret); of_node_put(np); - return ret; + goto put_list_kref; } else if (opp) { count++; } } /* There should be one of more OPP defined */ - if (WARN_ON(!count)) - return -ENOENT; + if (WARN_ON(!count)) { + ret = -ENOENT; + goto put_list_kref; + } list_for_each_entry(opp, &opp_table->opp_list, node) pstate_count += !!opp->pstate; @@ -695,7 +697,8 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) if (pstate_count && pstate_count != count) { dev_err(dev, "Not all nodes have performance state set (%d: %d)\n", count, pstate_count); - return -ENOENT; + ret = -ENOENT; + goto put_list_kref; } if (pstate_count) @@ -704,6 +707,11 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) opp_table->parsed_static_opps = true; return 0; + +put_list_kref: + _put_opp_list_kref(opp_table); + + return ret; } /* Initializes OPP tables based on old-deprecated bindings */ @@ -738,6 +746,7 @@ static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table) if (ret) { dev_err(dev, "%s: Failed to add OPP %ld (%d)\n", __func__, freq, ret); + _put_opp_list_kref(opp_table); return ret; } nr -= 2; diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 982b46f0a54d..3ef0bb281e7c 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -69,6 +69,7 @@ int pci_enable_ats(struct pci_dev *dev, int ps) dev->ats_enabled = 1; return 0; } +EXPORT_SYMBOL_GPL(pci_enable_ats); /** * pci_disable_ats - disable the ATS capability @@ -87,6 +88,7 @@ void pci_disable_ats(struct pci_dev *dev) dev->ats_enabled = 0; } +EXPORT_SYMBOL_GPL(pci_disable_ats); void pci_restore_ats_state(struct pci_dev *dev) { @@ -424,11 +426,12 @@ void pci_restore_pasid_state(struct pci_dev *pdev) int pci_pasid_features(struct pci_dev *pdev) { u16 supported; - int pasid = pdev->pasid_cap; + int pasid; if (pdev->is_virtfn) pdev = pci_physfn(pdev); + pasid = pdev->pasid_cap; if (!pasid) return -EINVAL; @@ -451,11 +454,12 @@ int pci_pasid_features(struct pci_dev *pdev) int pci_max_pasids(struct pci_dev *pdev) { u16 supported; - int pasid = pdev->pasid_cap; + int pasid; if (pdev->is_virtfn) pdev = pci_physfn(pdev); + pasid = pdev->pasid_cap; if (!pasid) return -EINVAL; diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index af677254a072..c8c702c494a2 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -422,7 +422,7 @@ static void ks_pcie_setup_rc_app_regs(struct keystone_pcie *ks_pcie) lower_32_bits(start) | OB_ENABLEN); ks_pcie_app_writel(ks_pcie, OB_OFFSET_HI(i), upper_32_bits(start)); - start += OB_WIN_SIZE; + start += OB_WIN_SIZE * SZ_1M; } val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); @@ -510,7 +510,7 @@ static void ks_pcie_stop_link(struct dw_pcie *pci) /* Disable Link training */ val = ks_pcie_app_readl(ks_pcie, CMD_STATUS); val &= ~LTSSM_EN_VAL; - ks_pcie_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); + ks_pcie_app_writel(ks_pcie, CMD_STATUS, val); } static int ks_pcie_start_link(struct dw_pcie *pci) @@ -1354,7 +1354,7 @@ static int __init ks_pcie_probe(struct platform_device *pdev) ret = of_property_read_u32(np, "num-viewport", &num_viewport); if (ret < 0) { dev_err(dev, "unable to read *num-viewport* property\n"); - return ret; + goto err_get_sync; } /* diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index 673a1725ef38..ac93f5a0398e 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2499,7 +2499,6 @@ static const struct tegra_pcie_soc tegra20_pcie = { .num_ports = 2, .ports = tegra20_pcie_ports, .msi_base_shift = 0, - .afi_pex2_ctrl = 0x128, .pads_pll_ctl = PADS_PLL_CTL_TEGRA20, .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_DIV10, .pads_refclk_cfg0 = 0xfa5cfa5c, @@ -2528,6 +2527,7 @@ static const struct tegra_pcie_soc tegra30_pcie = { .num_ports = 3, .ports = tegra30_pcie_ports, .msi_base_shift = 8, + .afi_pex2_ctrl = 0x128, .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, .pads_refclk_cfg0 = 0xfa5cfa5c, @@ -2798,7 +2798,7 @@ static int tegra_pcie_probe(struct platform_device *pdev) pm_runtime_enable(pcie->dev); err = pm_runtime_get_sync(pcie->dev); - if (err) { + if (err < 0) { dev_err(dev, "fail to enable pcie controller: %d\n", err); goto teardown_msi; } diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 0a468c73bae3..8c7f875acf7f 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -1588,6 +1588,30 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd802, DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd804, quirk_paxc_disable_msi_parsing); +static void quirk_paxc_bridge(struct pci_dev *pdev) +{ + /* + * The PCI config space is shared with the PAXC root port and the first + * Ethernet device. So, we need to workaround this by telling the PCI + * code that the bridge is not an Ethernet device. + */ + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + pdev->class = PCI_CLASS_BRIDGE_PCI << 8; + + /* + * MPSS is not being set properly (as it is currently 0). This is + * because that area of the PCI config space is hard coded to zero, and + * is not modifiable by firmware. Set this to 2 (e.g., 512 byte MPS) + * so that the MPS can be set to the real max value. + */ + pdev->pcie_mpss = 2; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16cd, quirk_paxc_bridge); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16f0, quirk_paxc_bridge); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd750, quirk_paxc_bridge); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd802, quirk_paxc_bridge); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd804, quirk_paxc_bridge); + MODULE_AUTHOR("Ray Jui "); MODULE_DESCRIPTION("Broadcom iPROC PCIe common driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 1e88fd427757..4d1f392b05f9 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -186,10 +186,10 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id) sprintf(buf, "virtfn%u", id); rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); if (rc) - goto failed2; + goto failed1; rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); if (rc) - goto failed3; + goto failed2; kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); @@ -197,11 +197,10 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id) return 0; -failed3: - sysfs_remove_link(&dev->dev.kobj, buf); failed2: - pci_stop_and_remove_bus_device(virtfn); + sysfs_remove_link(&dev->dev.kobj, buf); failed1: + pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); failed0: virtfn_remove_bus(dev->bus, bus); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e87196cc1a7f..951099279192 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5998,7 +5998,8 @@ EXPORT_SYMBOL_GPL(pci_pr3_present); /** * pci_add_dma_alias - Add a DMA devfn alias for a device * @dev: the PCI device for which alias is added - * @devfn: alias slot and function + * @devfn_from: alias slot and function + * @nr_devfns: number of subsequent devfns to alias * * This helper encodes an 8-bit devfn as a bit number in dma_alias_mask * which is used to program permissible bus-devfn source addresses for DMA @@ -6014,18 +6015,29 @@ EXPORT_SYMBOL_GPL(pci_pr3_present); * cannot be left as a userspace activity). DMA aliases should therefore * be configured via quirks, such as the PCI fixup header quirk. */ -void pci_add_dma_alias(struct pci_dev *dev, u8 devfn) +void pci_add_dma_alias(struct pci_dev *dev, u8 devfn_from, unsigned nr_devfns) { + int devfn_to; + + nr_devfns = min(nr_devfns, (unsigned) MAX_NR_DEVFNS - devfn_from); + devfn_to = devfn_from + nr_devfns - 1; + if (!dev->dma_alias_mask) - dev->dma_alias_mask = bitmap_zalloc(U8_MAX, GFP_KERNEL); + dev->dma_alias_mask = bitmap_zalloc(MAX_NR_DEVFNS, GFP_KERNEL); if (!dev->dma_alias_mask) { pci_warn(dev, "Unable to allocate DMA alias mask\n"); return; } - set_bit(devfn, dev->dma_alias_mask); - pci_info(dev, "Enabling fixed DMA alias to %02x.%d\n", - PCI_SLOT(devfn), PCI_FUNC(devfn)); + bitmap_set(dev->dma_alias_mask, devfn_from, nr_devfns); + + if (nr_devfns == 1) + pci_info(dev, "Enabling fixed DMA alias to %02x.%d\n", + PCI_SLOT(devfn_from), PCI_FUNC(devfn_from)); + else if (nr_devfns > 1) + pci_info(dev, "Enabling fixed DMA alias for devfn range from %02x.%d to %02x.%d\n", + PCI_SLOT(devfn_from), PCI_FUNC(devfn_from), + PCI_SLOT(devfn_to), PCI_FUNC(devfn_to)); } bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index a0a53bd05a0b..6394e7746fb5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -4,6 +4,9 @@ #include +/* Number of possible devfns: 0.0 to 1f.7 inclusive */ +#define MAX_NR_DEVFNS 256 + #define PCI_FIND_CAP_TTL 48 #define PCI_VSEC_ID_INTEL_TBT 0x1234 /* Thunderbolt */ diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 1ca86f2e0166..4a818b07a1af 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1445,6 +1445,7 @@ static int aer_probe(struct pcie_device *dev) return -ENOMEM; rpc->rpd = port; + INIT_KFIFO(rpc->aer_fifo); set_service_data(dev, rpc); status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index fbeb9f73ef28..83953752337c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1871,19 +1871,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x2609, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); +static void quirk_d3hot_delay(struct pci_dev *dev, unsigned int delay) +{ + if (dev->d3_delay >= delay) + return; + + dev->d3_delay = delay; + pci_info(dev, "extending delay after power-on from D3hot to %d msec\n", + dev->d3_delay); +} + static void quirk_radeon_pm(struct pci_dev *dev) { if (dev->subsystem_vendor == PCI_VENDOR_ID_APPLE && - dev->subsystem_device == 0x00e2) { - if (dev->d3_delay < 20) { - dev->d3_delay = 20; - pci_info(dev, "extending delay after power-on from D3 to %d msec\n", - dev->d3_delay); - } - } + dev->subsystem_device == 0x00e2) + quirk_d3hot_delay(dev, 20); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6741, quirk_radeon_pm); +/* + * Ryzen5/7 XHCI controllers fail upon resume from runtime suspend or s2idle. + * https://bugzilla.kernel.org/show_bug.cgi?id=205587 + * + * The kernel attempts to transition these devices to D3cold, but that seems + * to be ineffective on the platforms in question; the PCI device appears to + * remain on in D3hot state. The D3hot-to-D0 transition then requires an + * extended delay in order to succeed. + */ +static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev) +{ + quirk_d3hot_delay(dev, 20); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot); + #ifdef CONFIG_X86_IO_APIC static int dmi_disable_ioapicreroute(const struct dmi_system_id *d) { @@ -2381,32 +2402,6 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5719, quirk_brcm_5719_limit_mrrs); -#ifdef CONFIG_PCIE_IPROC_PLATFORM -static void quirk_paxc_bridge(struct pci_dev *pdev) -{ - /* - * The PCI config space is shared with the PAXC root port and the first - * Ethernet device. So, we need to workaround this by telling the PCI - * code that the bridge is not an Ethernet device. - */ - if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - pdev->class = PCI_CLASS_BRIDGE_PCI << 8; - - /* - * MPSS is not being set properly (as it is currently 0). This is - * because that area of the PCI config space is hard coded to zero, and - * is not modifiable by firmware. Set this to 2 (e.g., 512 byte MPS) - * so that the MPS can be set to the real max value. - */ - pdev->pcie_mpss = 2; -} -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16cd, quirk_paxc_bridge); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0x16f0, quirk_paxc_bridge); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd750, quirk_paxc_bridge); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd802, quirk_paxc_bridge); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_BROADCOM, 0xd804, quirk_paxc_bridge); -#endif - /* * Originally in EDAC sources for i82875P: Intel tells BIOS developers to * hide device 6 which configures the overflow device access containing the @@ -3932,7 +3927,7 @@ int pci_dev_specific_reset(struct pci_dev *dev, int probe) static void quirk_dma_func0_alias(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 0) - pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0)); + pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 0), 1); } /* @@ -3946,7 +3941,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_RICOH, 0xe476, quirk_dma_func0_alias); static void quirk_dma_func1_alias(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 1) - pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1)); + pci_add_dma_alias(dev, PCI_DEVFN(PCI_SLOT(dev->devfn), 1), 1); } /* @@ -4031,7 +4026,7 @@ static void quirk_fixed_dma_alias(struct pci_dev *dev) id = pci_match_id(fixed_dma_alias_tbl, dev); if (id) - pci_add_dma_alias(dev, id->driver_data); + pci_add_dma_alias(dev, id->driver_data, 1); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ADAPTEC2, 0x0285, quirk_fixed_dma_alias); @@ -4072,9 +4067,9 @@ DECLARE_PCI_FIXUP_HEADER(0x8086, 0x244e, quirk_use_pcie_bridge_dma_alias); */ static void quirk_mic_x200_dma_alias(struct pci_dev *pdev) { - pci_add_dma_alias(pdev, PCI_DEVFN(0x10, 0x0)); - pci_add_dma_alias(pdev, PCI_DEVFN(0x11, 0x0)); - pci_add_dma_alias(pdev, PCI_DEVFN(0x12, 0x3)); + pci_add_dma_alias(pdev, PCI_DEVFN(0x10, 0x0), 1); + pci_add_dma_alias(pdev, PCI_DEVFN(0x11, 0x0), 1); + pci_add_dma_alias(pdev, PCI_DEVFN(0x12, 0x3), 1); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias); @@ -4098,13 +4093,8 @@ static void quirk_pex_vca_alias(struct pci_dev *pdev) const unsigned int num_pci_slots = 0x20; unsigned int slot; - for (slot = 0; slot < num_pci_slots; slot++) { - pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0)); - pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x1)); - pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x2)); - pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x3)); - pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x4)); - } + for (slot = 0; slot < num_pci_slots; slot++) + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0), 5); } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2954, quirk_pex_vca_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2955, quirk_pex_vca_alias); @@ -5339,7 +5329,7 @@ static void quirk_switchtec_ntb_dma_alias(struct pci_dev *pdev) pci_dbg(pdev, "Aliasing Partition %d Proxy ID %02x.%d\n", pp, PCI_SLOT(devfn), PCI_FUNC(devfn)); - pci_add_dma_alias(pdev, devfn); + pci_add_dma_alias(pdev, devfn, 1); } } @@ -5381,6 +5371,21 @@ SWITCHTEC_QUIRK(0x8574); /* PFXI 64XG3 */ SWITCHTEC_QUIRK(0x8575); /* PFXI 80XG3 */ SWITCHTEC_QUIRK(0x8576); /* PFXI 96XG3 */ +/* + * The PLX NTB uses devfn proxy IDs to move TLPs between NT endpoints. + * These IDs are used to forward responses to the originator on the other + * side of the NTB. Alias all possible IDs to the NTB to permit access when + * the IOMMU is turned on. + */ +static void quirk_plx_ntb_dma_alias(struct pci_dev *pdev) +{ + pci_info(pdev, "Setting PLX NTB proxy ID aliases\n"); + /* PLX NTB may use all 256 devfns */ + pci_add_dma_alias(pdev, 0, 256); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, 0x87b0, quirk_plx_ntb_dma_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PLX, 0x87b1, quirk_plx_ntb_dma_alias); + /* * On Lenovo Thinkpad P50 SKUs with a Nvidia Quadro M1000M, the BIOS does * not always reset the secondary Nvidia GPU between reboots if the system diff --git a/drivers/pci/search.c b/drivers/pci/search.c index bade14002fd8..e4dbdef5aef0 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -41,9 +41,9 @@ int pci_for_each_dma_alias(struct pci_dev *pdev, * DMA, iterate over that too. */ if (unlikely(pdev->dma_alias_mask)) { - u8 devfn; + unsigned int devfn; - for_each_set_bit(devfn, pdev->dma_alias_mask, U8_MAX) { + for_each_set_bit(devfn, pdev->dma_alias_mask, MAX_NR_DEVFNS) { ret = fn(pdev, PCI_DEVID(pdev->bus->number, devfn), data); if (ret) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index f279826204eb..591161ce0f51 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1803,12 +1803,18 @@ void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus) /* Restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); @@ -2055,12 +2061,18 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) /* Restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 88091bbfe77f..9c3ad09d3022 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -1276,7 +1276,7 @@ static int switchtec_init_isr(struct switchtec_dev *stdev) if (nvecs < 0) return nvecs; - event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number); + event_irq = ioread16(&stdev->mmio_part_cfg->vep_vector_number); if (event_irq < 0 || event_irq >= nvecs) return -EFAULT; @@ -1349,7 +1349,7 @@ static int switchtec_init_pci(struct switchtec_dev *stdev, if (rc) return rc; - rc = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rc) return rc; diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index acce8781c456..f5c7a845cd7b 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -24,8 +24,6 @@ static int arm_pmu_acpi_register_irq(int cpu) int gsi, trigger; gicc = acpi_cpu_get_madt_gicc(cpu); - if (WARN_ON(!gicc)) - return -EINVAL; gsi = gicc->performance_interrupt; @@ -64,11 +62,10 @@ static void arm_pmu_acpi_unregister_irq(int cpu) int gsi; gicc = acpi_cpu_get_madt_gicc(cpu); - if (!gicc) - return; gsi = gicc->performance_interrupt; - acpi_unregister_gsi(gsi); + if (gsi) + acpi_unregister_gsi(gsi); } #if IS_ENABLED(CONFIG_ARM_SPE_PMU) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index d704eccc548f..f01a57e5a5f3 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -771,7 +771,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) smmu_pmu->reloc_base = smmu_pmu->reg_base; } - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq > 0) smmu_pmu->irq = irq; diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 55083c67b2bb..90884d14f95f 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -388,9 +388,10 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, if (enable) { /* - * must disable first, then enable again - * otherwise, cycle counter will not work - * if previous state is enabled. + * cycle counter is special which should firstly write 0 then + * write 1 into CLEAR bit to clear it. Other counters only + * need write 0 into CLEAR bit and it turns out to be 1 by + * hardware. Below enable flow is harmless for all counters. */ writel(0, pmu->base + reg); val = CNTL_EN | CNTL_CLEAR; @@ -398,7 +399,8 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config, writel(val, pmu->base + reg); } else { /* Disable counter */ - writel(0, pmu->base + reg); + val = readl_relaxed(pmu->base + reg) & CNTL_EN_MASK; + writel(val, pmu->base + reg); } } @@ -633,13 +635,17 @@ static int ddr_perf_probe(struct platform_device *pdev) if (ret < 0) { dev_err(&pdev->dev, "cpuhp_setup_state_multi failed\n"); - goto ddr_perf_err; + goto cpuhp_state_err; } pmu->cpuhp_state = ret; /* Register the pmu instance for cpu hotplug */ - cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node); + ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + goto cpuhp_instance_err; + } /* Request irq */ irq = of_irq_get(np, 0); @@ -673,9 +679,10 @@ static int ddr_perf_probe(struct platform_device *pdev) return 0; ddr_perf_err: - if (pmu->cpuhp_state) - cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); - + cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); +cpuhp_instance_err: + cpuhp_remove_multi_state(pmu->cpuhp_state); +cpuhp_state_err: ida_simple_remove(&ddr_ida, pmu->id); dev_warn(&pdev->dev, "i.MX8 DDR Perf PMU failed (%d), disabled\n", ret); return ret; @@ -686,6 +693,7 @@ static int ddr_perf_remove(struct platform_device *pdev) struct ddr_pmu *pmu = platform_get_drvdata(pdev); cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node); + cpuhp_remove_multi_state(pmu->cpuhp_state); irq_set_affinity_hint(pmu->irq, NULL); perf_pmu_unregister(&pmu->pmu); diff --git a/drivers/phy/allwinner/phy-sun50i-usb3.c b/drivers/phy/allwinner/phy-sun50i-usb3.c index 1169f3e83a6f..b1c04f71a31d 100644 --- a/drivers/phy/allwinner/phy-sun50i-usb3.c +++ b/drivers/phy/allwinner/phy-sun50i-usb3.c @@ -49,7 +49,7 @@ #define SUNXI_LOS_BIAS(n) ((n) << 3) #define SUNXI_LOS_BIAS_MASK GENMASK(5, 3) #define SUNXI_TXVBOOSTLVL(n) ((n) << 0) -#define SUNXI_TXVBOOSTLVL_MASK GENMASK(0, 2) +#define SUNXI_TXVBOOSTLVL_MASK GENMASK(2, 0) struct sun50i_usb3_phy { struct phy *phy; diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c index f20524f0c21d..94a34cf75eb3 100644 --- a/drivers/phy/motorola/phy-mapphone-mdm6600.c +++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c @@ -20,6 +20,7 @@ #define PHY_MDM6600_PHY_DELAY_MS 4000 /* PHY enable 2.2s to 3.5s */ #define PHY_MDM6600_ENABLED_DELAY_MS 8000 /* 8s more total for MDM6600 */ +#define PHY_MDM6600_WAKE_KICK_MS 600 /* time on after GPIO toggle */ #define MDM6600_MODEM_IDLE_DELAY_MS 1000 /* modem after USB suspend */ #define MDM6600_MODEM_WAKE_DELAY_MS 200 /* modem response after idle */ @@ -243,10 +244,24 @@ static irqreturn_t phy_mdm6600_wakeirq_thread(int irq, void *data) { struct phy_mdm6600 *ddata = data; struct gpio_desc *mode_gpio1; + int error, wakeup; mode_gpio1 = ddata->mode_gpios->desc[PHY_MDM6600_MODE1]; - dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n", - gpiod_get_value(mode_gpio1)); + wakeup = gpiod_get_value(mode_gpio1); + if (!wakeup) + return IRQ_NONE; + + dev_dbg(ddata->dev, "OOB wake on mode_gpio1: %i\n", wakeup); + error = pm_runtime_get_sync(ddata->dev); + if (error < 0) { + pm_runtime_put_noidle(ddata->dev); + + return IRQ_NONE; + } + + /* Just wake-up and kick the autosuspend timer */ + pm_runtime_mark_last_busy(ddata->dev); + pm_runtime_put_autosuspend(ddata->dev); return IRQ_HANDLED; } @@ -496,8 +511,14 @@ static void phy_mdm6600_modem_wake(struct work_struct *work) ddata = container_of(work, struct phy_mdm6600, modem_wake_work.work); phy_mdm6600_wake_modem(ddata); + + /* + * The modem does not always stay awake 1.2 seconds after toggling + * the wake GPIO, and sometimes it idles after about some 600 ms + * making writes time out. + */ schedule_delayed_work(&ddata->modem_wake_work, - msecs_to_jiffies(MDM6600_MODEM_IDLE_DELAY_MS)); + msecs_to_jiffies(PHY_MDM6600_WAKE_KICK_MS)); } static int __maybe_unused phy_mdm6600_runtime_suspend(struct device *dev) diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 42bc5150dd92..febe0aef68d4 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -80,7 +80,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask) if (readl_relaxed(addr) & mask) return 0; - usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); + usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); } while (!time_after(jiffies, timeout)); return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT; diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index a28bd15297f5..1c536fc03c83 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -80,20 +80,20 @@ static int phy_gmii_sel_mode(struct phy *phy, enum phy_mode mode, int submode) break; case PHY_INTERFACE_MODE_MII: - mode = AM33XX_GMII_SEL_MODE_MII; + case PHY_INTERFACE_MODE_GMII: + gmii_sel_mode = AM33XX_GMII_SEL_MODE_MII; break; default: - dev_warn(dev, - "port%u: unsupported mode: \"%s\". Defaulting to MII.\n", - if_phy->id, phy_modes(rgmii_id)); + dev_warn(dev, "port%u: unsupported mode: \"%s\"\n", + if_phy->id, phy_modes(submode)); return -EINVAL; } if_phy->phy_if_mode = submode; dev_dbg(dev, "%s id:%u mode:%u rgmii_id:%d rmii_clk_ext:%d\n", - __func__, if_phy->id, mode, rgmii_id, + __func__, if_phy->id, submode, rgmii_id, if_phy->rmii_clock_external); regfield = if_phy->fields[PHY_GMII_SEL_PORT_MODE]; diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 2bbd8ee93507..6381745e3bb1 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2025,7 +2025,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { diff --git a/drivers/pinctrl/freescale/pinctrl-scu.c b/drivers/pinctrl/freescale/pinctrl-scu.c index 73bf1d9f9cc6..23cf04bdfc55 100644 --- a/drivers/pinctrl/freescale/pinctrl-scu.c +++ b/drivers/pinctrl/freescale/pinctrl-scu.c @@ -23,12 +23,12 @@ struct imx_sc_msg_req_pad_set { struct imx_sc_rpc_msg hdr; u32 val; u16 pad; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_req_pad_get { struct imx_sc_rpc_msg hdr; u16 pad; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_resp_pad_get { struct imx_sc_rpc_msg hdr; diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index 55141d5de29e..ce9cf50121bd 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -107,6 +107,7 @@ struct byt_gpio_pin_context { struct byt_gpio { struct gpio_chip chip; + struct irq_chip irqchip; struct platform_device *pdev; struct pinctrl_dev *pctl_dev; struct pinctrl_desc pctl_desc; @@ -752,7 +753,13 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */ + if (value & BYT_DIRECT_IRQ_EN) + /* nothing to do */ ; + else + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); raw_spin_unlock_irqrestore(&byt_lock, flags); } @@ -1395,15 +1402,6 @@ static int byt_irq_type(struct irq_data *d, unsigned int type) return 0; } -static struct irq_chip byt_irqchip = { - .name = "BYT-GPIO", - .irq_ack = byt_irq_ack, - .irq_mask = byt_irq_mask, - .irq_unmask = byt_irq_unmask, - .irq_set_type = byt_irq_type, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - static void byt_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); @@ -1551,8 +1549,15 @@ static int byt_gpio_probe(struct byt_gpio *vg) if (irq_rc && irq_rc->start) { struct gpio_irq_chip *girq; + vg->irqchip.name = "BYT-GPIO", + vg->irqchip.irq_ack = byt_irq_ack, + vg->irqchip.irq_mask = byt_irq_mask, + vg->irqchip.irq_unmask = byt_irq_unmask, + vg->irqchip.irq_set_type = byt_irq_type, + vg->irqchip.flags = IRQCHIP_SKIP_SET_WAKE, + girq = &gc->irq; - girq->chip = &byt_irqchip; + girq->chip = &vg->irqchip; girq->init_hw = byt_gpio_irq_init_hw; girq->parent_handler = byt_gpio_irq_handler; girq->num_parents = 1; diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 58572b15b3ce..08a86f6fdea6 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c +++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -2,7 +2,7 @@ /* * Intel Tiger Lake PCH pinctrl/GPIO driver * - * Copyright (C) 2019, Intel Corporation + * Copyright (C) 2019 - 2020, Intel Corporation * Authors: Andy Shevchenko * Mika Westerberg */ @@ -21,15 +21,19 @@ #define TGL_GPI_IS 0x100 #define TGL_GPI_IE 0x120 -#define TGL_GPP(r, s, e) \ +#define TGL_NO_GPIO -1 + +#define TGL_GPP(r, s, e, g) \ { \ .reg_num = (r), \ .base = (s), \ .size = ((e) - (s) + 1), \ + .gpio_base = (g), \ } -#define TGL_COMMUNITY(s, e, g) \ +#define TGL_COMMUNITY(b, s, e, g) \ { \ + .barno = (b), \ .padown_offset = TGL_PAD_OWN, \ .padcfglock_offset = TGL_PADCFGLOCK, \ .hostown_offset = TGL_HOSTSW_OWN, \ @@ -42,7 +46,7 @@ } /* Tiger Lake-LP */ -static const struct pinctrl_pin_desc tgllp_community0_pins[] = { +static const struct pinctrl_pin_desc tgllp_pins[] = { /* GPP_B */ PINCTRL_PIN(0, "CORE_VID_0"), PINCTRL_PIN(1, "CORE_VID_1"), @@ -113,324 +117,273 @@ static const struct pinctrl_pin_desc tgllp_community0_pins[] = { PINCTRL_PIN(64, "GPPC_A_22"), PINCTRL_PIN(65, "I2S1_SCLK"), PINCTRL_PIN(66, "ESPI_CLK_LOOPBK"), -}; - -static const struct intel_padgroup tgllp_community0_gpps[] = { - TGL_GPP(0, 0, 25), /* GPP_B */ - TGL_GPP(1, 26, 41), /* GPP_T */ - TGL_GPP(2, 42, 66), /* GPP_A */ -}; - -static const struct intel_community tgllp_community0[] = { - TGL_COMMUNITY(0, 66, tgllp_community0_gpps), -}; - -static const struct intel_pinctrl_soc_data tgllp_community0_soc_data = { - .uid = "0", - .pins = tgllp_community0_pins, - .npins = ARRAY_SIZE(tgllp_community0_pins), - .communities = tgllp_community0, - .ncommunities = ARRAY_SIZE(tgllp_community0), -}; - -static const struct pinctrl_pin_desc tgllp_community1_pins[] = { /* GPP_S */ - PINCTRL_PIN(0, "SNDW0_CLK"), - PINCTRL_PIN(1, "SNDW0_DATA"), - PINCTRL_PIN(2, "SNDW1_CLK"), - PINCTRL_PIN(3, "SNDW1_DATA"), - PINCTRL_PIN(4, "SNDW2_CLK"), - PINCTRL_PIN(5, "SNDW2_DATA"), - PINCTRL_PIN(6, "SNDW3_CLK"), - PINCTRL_PIN(7, "SNDW3_DATA"), + PINCTRL_PIN(67, "SNDW0_CLK"), + PINCTRL_PIN(68, "SNDW0_DATA"), + PINCTRL_PIN(69, "SNDW1_CLK"), + PINCTRL_PIN(70, "SNDW1_DATA"), + PINCTRL_PIN(71, "SNDW2_CLK"), + PINCTRL_PIN(72, "SNDW2_DATA"), + PINCTRL_PIN(73, "SNDW3_CLK"), + PINCTRL_PIN(74, "SNDW3_DATA"), /* GPP_H */ - PINCTRL_PIN(8, "GPPC_H_0"), - PINCTRL_PIN(9, "GPPC_H_1"), - PINCTRL_PIN(10, "GPPC_H_2"), - PINCTRL_PIN(11, "SX_EXIT_HOLDOFFB"), - PINCTRL_PIN(12, "I2C2_SDA"), - PINCTRL_PIN(13, "I2C2_SCL"), - PINCTRL_PIN(14, "I2C3_SDA"), - PINCTRL_PIN(15, "I2C3_SCL"), - PINCTRL_PIN(16, "I2C4_SDA"), - PINCTRL_PIN(17, "I2C4_SCL"), - PINCTRL_PIN(18, "SRCCLKREQB_4"), - PINCTRL_PIN(19, "SRCCLKREQB_5"), - PINCTRL_PIN(20, "M2_SKT2_CFG_0"), - PINCTRL_PIN(21, "M2_SKT2_CFG_1"), - PINCTRL_PIN(22, "M2_SKT2_CFG_2"), - PINCTRL_PIN(23, "M2_SKT2_CFG_3"), - PINCTRL_PIN(24, "DDPB_CTRLCLK"), - PINCTRL_PIN(25, "DDPB_CTRLDATA"), - PINCTRL_PIN(26, "CPU_C10_GATEB"), - PINCTRL_PIN(27, "TIME_SYNC_0"), - PINCTRL_PIN(28, "IMGCLKOUT_1"), - PINCTRL_PIN(29, "IMGCLKOUT_2"), - PINCTRL_PIN(30, "IMGCLKOUT_3"), - PINCTRL_PIN(31, "IMGCLKOUT_4"), + PINCTRL_PIN(75, "GPPC_H_0"), + PINCTRL_PIN(76, "GPPC_H_1"), + PINCTRL_PIN(77, "GPPC_H_2"), + PINCTRL_PIN(78, "SX_EXIT_HOLDOFFB"), + PINCTRL_PIN(79, "I2C2_SDA"), + PINCTRL_PIN(80, "I2C2_SCL"), + PINCTRL_PIN(81, "I2C3_SDA"), + PINCTRL_PIN(82, "I2C3_SCL"), + PINCTRL_PIN(83, "I2C4_SDA"), + PINCTRL_PIN(84, "I2C4_SCL"), + PINCTRL_PIN(85, "SRCCLKREQB_4"), + PINCTRL_PIN(86, "SRCCLKREQB_5"), + PINCTRL_PIN(87, "M2_SKT2_CFG_0"), + PINCTRL_PIN(88, "M2_SKT2_CFG_1"), + PINCTRL_PIN(89, "M2_SKT2_CFG_2"), + PINCTRL_PIN(90, "M2_SKT2_CFG_3"), + PINCTRL_PIN(91, "DDPB_CTRLCLK"), + PINCTRL_PIN(92, "DDPB_CTRLDATA"), + PINCTRL_PIN(93, "CPU_C10_GATEB"), + PINCTRL_PIN(94, "TIME_SYNC_0"), + PINCTRL_PIN(95, "IMGCLKOUT_1"), + PINCTRL_PIN(96, "IMGCLKOUT_2"), + PINCTRL_PIN(97, "IMGCLKOUT_3"), + PINCTRL_PIN(98, "IMGCLKOUT_4"), /* GPP_D */ - PINCTRL_PIN(32, "ISH_GP_0"), - PINCTRL_PIN(33, "ISH_GP_1"), - PINCTRL_PIN(34, "ISH_GP_2"), - PINCTRL_PIN(35, "ISH_GP_3"), - PINCTRL_PIN(36, "IMGCLKOUT_0"), - PINCTRL_PIN(37, "SRCCLKREQB_0"), - PINCTRL_PIN(38, "SRCCLKREQB_1"), - PINCTRL_PIN(39, "SRCCLKREQB_2"), - PINCTRL_PIN(40, "SRCCLKREQB_3"), - PINCTRL_PIN(41, "ISH_SPI_CSB"), - PINCTRL_PIN(42, "ISH_SPI_CLK"), - PINCTRL_PIN(43, "ISH_SPI_MISO"), - PINCTRL_PIN(44, "ISH_SPI_MOSI"), - PINCTRL_PIN(45, "ISH_UART0_RXD"), - PINCTRL_PIN(46, "ISH_UART0_TXD"), - PINCTRL_PIN(47, "ISH_UART0_RTSB"), - PINCTRL_PIN(48, "ISH_UART0_CTSB"), - PINCTRL_PIN(49, "ISH_GP_4"), - PINCTRL_PIN(50, "ISH_GP_5"), - PINCTRL_PIN(51, "I2S_MCLK1_OUT"), - PINCTRL_PIN(52, "GSPI2_CLK_LOOPBK"), + PINCTRL_PIN(99, "ISH_GP_0"), + PINCTRL_PIN(100, "ISH_GP_1"), + PINCTRL_PIN(101, "ISH_GP_2"), + PINCTRL_PIN(102, "ISH_GP_3"), + PINCTRL_PIN(103, "IMGCLKOUT_0"), + PINCTRL_PIN(104, "SRCCLKREQB_0"), + PINCTRL_PIN(105, "SRCCLKREQB_1"), + PINCTRL_PIN(106, "SRCCLKREQB_2"), + PINCTRL_PIN(107, "SRCCLKREQB_3"), + PINCTRL_PIN(108, "ISH_SPI_CSB"), + PINCTRL_PIN(109, "ISH_SPI_CLK"), + PINCTRL_PIN(110, "ISH_SPI_MISO"), + PINCTRL_PIN(111, "ISH_SPI_MOSI"), + PINCTRL_PIN(112, "ISH_UART0_RXD"), + PINCTRL_PIN(113, "ISH_UART0_TXD"), + PINCTRL_PIN(114, "ISH_UART0_RTSB"), + PINCTRL_PIN(115, "ISH_UART0_CTSB"), + PINCTRL_PIN(116, "ISH_GP_4"), + PINCTRL_PIN(117, "ISH_GP_5"), + PINCTRL_PIN(118, "I2S_MCLK1_OUT"), + PINCTRL_PIN(119, "GSPI2_CLK_LOOPBK"), /* GPP_U */ - PINCTRL_PIN(53, "UART3_RXD"), - PINCTRL_PIN(54, "UART3_TXD"), - PINCTRL_PIN(55, "UART3_RTSB"), - PINCTRL_PIN(56, "UART3_CTSB"), - PINCTRL_PIN(57, "GSPI3_CS0B"), - PINCTRL_PIN(58, "GSPI3_CLK"), - PINCTRL_PIN(59, "GSPI3_MISO"), - PINCTRL_PIN(60, "GSPI3_MOSI"), - PINCTRL_PIN(61, "GSPI4_CS0B"), - PINCTRL_PIN(62, "GSPI4_CLK"), - PINCTRL_PIN(63, "GSPI4_MISO"), - PINCTRL_PIN(64, "GSPI4_MOSI"), - PINCTRL_PIN(65, "GSPI5_CS0B"), - PINCTRL_PIN(66, "GSPI5_CLK"), - PINCTRL_PIN(67, "GSPI5_MISO"), - PINCTRL_PIN(68, "GSPI5_MOSI"), - PINCTRL_PIN(69, "GSPI6_CS0B"), - PINCTRL_PIN(70, "GSPI6_CLK"), - PINCTRL_PIN(71, "GSPI6_MISO"), - PINCTRL_PIN(72, "GSPI6_MOSI"), - PINCTRL_PIN(73, "GSPI3_CLK_LOOPBK"), - PINCTRL_PIN(74, "GSPI4_CLK_LOOPBK"), - PINCTRL_PIN(75, "GSPI5_CLK_LOOPBK"), - PINCTRL_PIN(76, "GSPI6_CLK_LOOPBK"), + PINCTRL_PIN(120, "UART3_RXD"), + PINCTRL_PIN(121, "UART3_TXD"), + PINCTRL_PIN(122, "UART3_RTSB"), + PINCTRL_PIN(123, "UART3_CTSB"), + PINCTRL_PIN(124, "GSPI3_CS0B"), + PINCTRL_PIN(125, "GSPI3_CLK"), + PINCTRL_PIN(126, "GSPI3_MISO"), + PINCTRL_PIN(127, "GSPI3_MOSI"), + PINCTRL_PIN(128, "GSPI4_CS0B"), + PINCTRL_PIN(129, "GSPI4_CLK"), + PINCTRL_PIN(130, "GSPI4_MISO"), + PINCTRL_PIN(131, "GSPI4_MOSI"), + PINCTRL_PIN(132, "GSPI5_CS0B"), + PINCTRL_PIN(133, "GSPI5_CLK"), + PINCTRL_PIN(134, "GSPI5_MISO"), + PINCTRL_PIN(135, "GSPI5_MOSI"), + PINCTRL_PIN(136, "GSPI6_CS0B"), + PINCTRL_PIN(137, "GSPI6_CLK"), + PINCTRL_PIN(138, "GSPI6_MISO"), + PINCTRL_PIN(139, "GSPI6_MOSI"), + PINCTRL_PIN(140, "GSPI3_CLK_LOOPBK"), + PINCTRL_PIN(141, "GSPI4_CLK_LOOPBK"), + PINCTRL_PIN(142, "GSPI5_CLK_LOOPBK"), + PINCTRL_PIN(143, "GSPI6_CLK_LOOPBK"), /* vGPIO */ - PINCTRL_PIN(77, "CNV_BTEN"), - PINCTRL_PIN(78, "CNV_BT_HOST_WAKEB"), - PINCTRL_PIN(79, "CNV_BT_IF_SELECT"), - PINCTRL_PIN(80, "vCNV_BT_UART_TXD"), - PINCTRL_PIN(81, "vCNV_BT_UART_RXD"), - PINCTRL_PIN(82, "vCNV_BT_UART_CTS_B"), - PINCTRL_PIN(83, "vCNV_BT_UART_RTS_B"), - PINCTRL_PIN(84, "vCNV_MFUART1_TXD"), - PINCTRL_PIN(85, "vCNV_MFUART1_RXD"), - PINCTRL_PIN(86, "vCNV_MFUART1_CTS_B"), - PINCTRL_PIN(87, "vCNV_MFUART1_RTS_B"), - PINCTRL_PIN(88, "vUART0_TXD"), - PINCTRL_PIN(89, "vUART0_RXD"), - PINCTRL_PIN(90, "vUART0_CTS_B"), - PINCTRL_PIN(91, "vUART0_RTS_B"), - PINCTRL_PIN(92, "vISH_UART0_TXD"), - PINCTRL_PIN(93, "vISH_UART0_RXD"), - PINCTRL_PIN(94, "vISH_UART0_CTS_B"), - PINCTRL_PIN(95, "vISH_UART0_RTS_B"), - PINCTRL_PIN(96, "vCNV_BT_I2S_BCLK"), - PINCTRL_PIN(97, "vCNV_BT_I2S_WS_SYNC"), - PINCTRL_PIN(98, "vCNV_BT_I2S_SDO"), - PINCTRL_PIN(99, "vCNV_BT_I2S_SDI"), - PINCTRL_PIN(100, "vI2S2_SCLK"), - PINCTRL_PIN(101, "vI2S2_SFRM"), - PINCTRL_PIN(102, "vI2S2_TXD"), - PINCTRL_PIN(103, "vI2S2_RXD"), -}; - -static const struct intel_padgroup tgllp_community1_gpps[] = { - TGL_GPP(0, 0, 7), /* GPP_S */ - TGL_GPP(1, 8, 31), /* GPP_H */ - TGL_GPP(2, 32, 52), /* GPP_D */ - TGL_GPP(3, 53, 76), /* GPP_U */ - TGL_GPP(4, 77, 103), /* vGPIO */ -}; - -static const struct intel_community tgllp_community1[] = { - TGL_COMMUNITY(0, 103, tgllp_community1_gpps), -}; - -static const struct intel_pinctrl_soc_data tgllp_community1_soc_data = { - .uid = "1", - .pins = tgllp_community1_pins, - .npins = ARRAY_SIZE(tgllp_community1_pins), - .communities = tgllp_community1, - .ncommunities = ARRAY_SIZE(tgllp_community1), -}; - -static const struct pinctrl_pin_desc tgllp_community4_pins[] = { + PINCTRL_PIN(144, "CNV_BTEN"), + PINCTRL_PIN(145, "CNV_BT_HOST_WAKEB"), + PINCTRL_PIN(146, "CNV_BT_IF_SELECT"), + PINCTRL_PIN(147, "vCNV_BT_UART_TXD"), + PINCTRL_PIN(148, "vCNV_BT_UART_RXD"), + PINCTRL_PIN(149, "vCNV_BT_UART_CTS_B"), + PINCTRL_PIN(150, "vCNV_BT_UART_RTS_B"), + PINCTRL_PIN(151, "vCNV_MFUART1_TXD"), + PINCTRL_PIN(152, "vCNV_MFUART1_RXD"), + PINCTRL_PIN(153, "vCNV_MFUART1_CTS_B"), + PINCTRL_PIN(154, "vCNV_MFUART1_RTS_B"), + PINCTRL_PIN(155, "vUART0_TXD"), + PINCTRL_PIN(156, "vUART0_RXD"), + PINCTRL_PIN(157, "vUART0_CTS_B"), + PINCTRL_PIN(158, "vUART0_RTS_B"), + PINCTRL_PIN(159, "vISH_UART0_TXD"), + PINCTRL_PIN(160, "vISH_UART0_RXD"), + PINCTRL_PIN(161, "vISH_UART0_CTS_B"), + PINCTRL_PIN(162, "vISH_UART0_RTS_B"), + PINCTRL_PIN(163, "vCNV_BT_I2S_BCLK"), + PINCTRL_PIN(164, "vCNV_BT_I2S_WS_SYNC"), + PINCTRL_PIN(165, "vCNV_BT_I2S_SDO"), + PINCTRL_PIN(166, "vCNV_BT_I2S_SDI"), + PINCTRL_PIN(167, "vI2S2_SCLK"), + PINCTRL_PIN(168, "vI2S2_SFRM"), + PINCTRL_PIN(169, "vI2S2_TXD"), + PINCTRL_PIN(170, "vI2S2_RXD"), /* GPP_C */ - PINCTRL_PIN(0, "SMBCLK"), - PINCTRL_PIN(1, "SMBDATA"), - PINCTRL_PIN(2, "SMBALERTB"), - PINCTRL_PIN(3, "SML0CLK"), - PINCTRL_PIN(4, "SML0DATA"), - PINCTRL_PIN(5, "SML0ALERTB"), - PINCTRL_PIN(6, "SML1CLK"), - PINCTRL_PIN(7, "SML1DATA"), - PINCTRL_PIN(8, "UART0_RXD"), - PINCTRL_PIN(9, "UART0_TXD"), - PINCTRL_PIN(10, "UART0_RTSB"), - PINCTRL_PIN(11, "UART0_CTSB"), - PINCTRL_PIN(12, "UART1_RXD"), - PINCTRL_PIN(13, "UART1_TXD"), - PINCTRL_PIN(14, "UART1_RTSB"), - PINCTRL_PIN(15, "UART1_CTSB"), - PINCTRL_PIN(16, "I2C0_SDA"), - PINCTRL_PIN(17, "I2C0_SCL"), - PINCTRL_PIN(18, "I2C1_SDA"), - PINCTRL_PIN(19, "I2C1_SCL"), - PINCTRL_PIN(20, "UART2_RXD"), - PINCTRL_PIN(21, "UART2_TXD"), - PINCTRL_PIN(22, "UART2_RTSB"), - PINCTRL_PIN(23, "UART2_CTSB"), + PINCTRL_PIN(171, "SMBCLK"), + PINCTRL_PIN(172, "SMBDATA"), + PINCTRL_PIN(173, "SMBALERTB"), + PINCTRL_PIN(174, "SML0CLK"), + PINCTRL_PIN(175, "SML0DATA"), + PINCTRL_PIN(176, "SML0ALERTB"), + PINCTRL_PIN(177, "SML1CLK"), + PINCTRL_PIN(178, "SML1DATA"), + PINCTRL_PIN(179, "UART0_RXD"), + PINCTRL_PIN(180, "UART0_TXD"), + PINCTRL_PIN(181, "UART0_RTSB"), + PINCTRL_PIN(182, "UART0_CTSB"), + PINCTRL_PIN(183, "UART1_RXD"), + PINCTRL_PIN(184, "UART1_TXD"), + PINCTRL_PIN(185, "UART1_RTSB"), + PINCTRL_PIN(186, "UART1_CTSB"), + PINCTRL_PIN(187, "I2C0_SDA"), + PINCTRL_PIN(188, "I2C0_SCL"), + PINCTRL_PIN(189, "I2C1_SDA"), + PINCTRL_PIN(190, "I2C1_SCL"), + PINCTRL_PIN(191, "UART2_RXD"), + PINCTRL_PIN(192, "UART2_TXD"), + PINCTRL_PIN(193, "UART2_RTSB"), + PINCTRL_PIN(194, "UART2_CTSB"), /* GPP_F */ - PINCTRL_PIN(24, "CNV_BRI_DT"), - PINCTRL_PIN(25, "CNV_BRI_RSP"), - PINCTRL_PIN(26, "CNV_RGI_DT"), - PINCTRL_PIN(27, "CNV_RGI_RSP"), - PINCTRL_PIN(28, "CNV_RF_RESET_B"), - PINCTRL_PIN(29, "GPPC_F_5"), - PINCTRL_PIN(30, "CNV_PA_BLANKING"), - PINCTRL_PIN(31, "GPPC_F_7"), - PINCTRL_PIN(32, "I2S_MCLK2_INOUT"), - PINCTRL_PIN(33, "BOOTMPC"), - PINCTRL_PIN(34, "GPPC_F_10"), - PINCTRL_PIN(35, "GPPC_F_11"), - PINCTRL_PIN(36, "GSXDOUT"), - PINCTRL_PIN(37, "GSXSLOAD"), - PINCTRL_PIN(38, "GSXDIN"), - PINCTRL_PIN(39, "GSXSRESETB"), - PINCTRL_PIN(40, "GSXCLK"), - PINCTRL_PIN(41, "GMII_MDC"), - PINCTRL_PIN(42, "GMII_MDIO"), - PINCTRL_PIN(43, "SRCCLKREQB_6"), - PINCTRL_PIN(44, "EXT_PWR_GATEB"), - PINCTRL_PIN(45, "EXT_PWR_GATE2B"), - PINCTRL_PIN(46, "VNN_CTRL"), - PINCTRL_PIN(47, "V1P05_CTRL"), - PINCTRL_PIN(48, "GPPF_CLK_LOOPBACK"), + PINCTRL_PIN(195, "CNV_BRI_DT"), + PINCTRL_PIN(196, "CNV_BRI_RSP"), + PINCTRL_PIN(197, "CNV_RGI_DT"), + PINCTRL_PIN(198, "CNV_RGI_RSP"), + PINCTRL_PIN(199, "CNV_RF_RESET_B"), + PINCTRL_PIN(200, "GPPC_F_5"), + PINCTRL_PIN(201, "CNV_PA_BLANKING"), + PINCTRL_PIN(202, "GPPC_F_7"), + PINCTRL_PIN(203, "I2S_MCLK2_INOUT"), + PINCTRL_PIN(204, "BOOTMPC"), + PINCTRL_PIN(205, "GPPC_F_10"), + PINCTRL_PIN(206, "GPPC_F_11"), + PINCTRL_PIN(207, "GSXDOUT"), + PINCTRL_PIN(208, "GSXSLOAD"), + PINCTRL_PIN(209, "GSXDIN"), + PINCTRL_PIN(210, "GSXSRESETB"), + PINCTRL_PIN(211, "GSXCLK"), + PINCTRL_PIN(212, "GMII_MDC"), + PINCTRL_PIN(213, "GMII_MDIO"), + PINCTRL_PIN(214, "SRCCLKREQB_6"), + PINCTRL_PIN(215, "EXT_PWR_GATEB"), + PINCTRL_PIN(216, "EXT_PWR_GATE2B"), + PINCTRL_PIN(217, "VNN_CTRL"), + PINCTRL_PIN(218, "V1P05_CTRL"), + PINCTRL_PIN(219, "GPPF_CLK_LOOPBACK"), /* HVCMOS */ - PINCTRL_PIN(49, "L_BKLTEN"), - PINCTRL_PIN(50, "L_BKLTCTL"), - PINCTRL_PIN(51, "L_VDDEN"), - PINCTRL_PIN(52, "SYS_PWROK"), - PINCTRL_PIN(53, "SYS_RESETB"), - PINCTRL_PIN(54, "MLK_RSTB"), + PINCTRL_PIN(220, "L_BKLTEN"), + PINCTRL_PIN(221, "L_BKLTCTL"), + PINCTRL_PIN(222, "L_VDDEN"), + PINCTRL_PIN(223, "SYS_PWROK"), + PINCTRL_PIN(224, "SYS_RESETB"), + PINCTRL_PIN(225, "MLK_RSTB"), /* GPP_E */ - PINCTRL_PIN(55, "SATAXPCIE_0"), - PINCTRL_PIN(56, "SPI1_IO_2"), - PINCTRL_PIN(57, "SPI1_IO_3"), - PINCTRL_PIN(58, "CPU_GP_0"), - PINCTRL_PIN(59, "SATA_DEVSLP_0"), - PINCTRL_PIN(60, "SATA_DEVSLP_1"), - PINCTRL_PIN(61, "GPPC_E_6"), - PINCTRL_PIN(62, "CPU_GP_1"), - PINCTRL_PIN(63, "SPI1_CS1B"), - PINCTRL_PIN(64, "USB2_OCB_0"), - PINCTRL_PIN(65, "SPI1_CSB"), - PINCTRL_PIN(66, "SPI1_CLK"), - PINCTRL_PIN(67, "SPI1_MISO_IO_1"), - PINCTRL_PIN(68, "SPI1_MOSI_IO_0"), - PINCTRL_PIN(69, "DDSP_HPD_A"), - PINCTRL_PIN(70, "ISH_GP_6"), - PINCTRL_PIN(71, "ISH_GP_7"), - PINCTRL_PIN(72, "GPPC_E_17"), - PINCTRL_PIN(73, "DDP1_CTRLCLK"), - PINCTRL_PIN(74, "DDP1_CTRLDATA"), - PINCTRL_PIN(75, "DDP2_CTRLCLK"), - PINCTRL_PIN(76, "DDP2_CTRLDATA"), - PINCTRL_PIN(77, "DDPA_CTRLCLK"), - PINCTRL_PIN(78, "DDPA_CTRLDATA"), - PINCTRL_PIN(79, "SPI1_CLK_LOOPBK"), + PINCTRL_PIN(226, "SATAXPCIE_0"), + PINCTRL_PIN(227, "SPI1_IO_2"), + PINCTRL_PIN(228, "SPI1_IO_3"), + PINCTRL_PIN(229, "CPU_GP_0"), + PINCTRL_PIN(230, "SATA_DEVSLP_0"), + PINCTRL_PIN(231, "SATA_DEVSLP_1"), + PINCTRL_PIN(232, "GPPC_E_6"), + PINCTRL_PIN(233, "CPU_GP_1"), + PINCTRL_PIN(234, "SPI1_CS1B"), + PINCTRL_PIN(235, "USB2_OCB_0"), + PINCTRL_PIN(236, "SPI1_CSB"), + PINCTRL_PIN(237, "SPI1_CLK"), + PINCTRL_PIN(238, "SPI1_MISO_IO_1"), + PINCTRL_PIN(239, "SPI1_MOSI_IO_0"), + PINCTRL_PIN(240, "DDSP_HPD_A"), + PINCTRL_PIN(241, "ISH_GP_6"), + PINCTRL_PIN(242, "ISH_GP_7"), + PINCTRL_PIN(243, "GPPC_E_17"), + PINCTRL_PIN(244, "DDP1_CTRLCLK"), + PINCTRL_PIN(245, "DDP1_CTRLDATA"), + PINCTRL_PIN(246, "DDP2_CTRLCLK"), + PINCTRL_PIN(247, "DDP2_CTRLDATA"), + PINCTRL_PIN(248, "DDPA_CTRLCLK"), + PINCTRL_PIN(249, "DDPA_CTRLDATA"), + PINCTRL_PIN(250, "SPI1_CLK_LOOPBK"), /* JTAG */ - PINCTRL_PIN(80, "JTAG_TDO"), - PINCTRL_PIN(81, "JTAGX"), - PINCTRL_PIN(82, "PRDYB"), - PINCTRL_PIN(83, "PREQB"), - PINCTRL_PIN(84, "CPU_TRSTB"), - PINCTRL_PIN(85, "JTAG_TDI"), - PINCTRL_PIN(86, "JTAG_TMS"), - PINCTRL_PIN(87, "JTAG_TCK"), - PINCTRL_PIN(88, "DBG_PMODE"), -}; - -static const struct intel_padgroup tgllp_community4_gpps[] = { - TGL_GPP(0, 0, 23), /* GPP_C */ - TGL_GPP(1, 24, 48), /* GPP_F */ - TGL_GPP(2, 49, 54), /* HVCMOS */ - TGL_GPP(3, 55, 79), /* GPP_E */ - TGL_GPP(4, 80, 88), /* JTAG */ + PINCTRL_PIN(251, "JTAG_TDO"), + PINCTRL_PIN(252, "JTAGX"), + PINCTRL_PIN(253, "PRDYB"), + PINCTRL_PIN(254, "PREQB"), + PINCTRL_PIN(255, "CPU_TRSTB"), + PINCTRL_PIN(256, "JTAG_TDI"), + PINCTRL_PIN(257, "JTAG_TMS"), + PINCTRL_PIN(258, "JTAG_TCK"), + PINCTRL_PIN(259, "DBG_PMODE"), + /* GPP_R */ + PINCTRL_PIN(260, "HDA_BCLK"), + PINCTRL_PIN(261, "HDA_SYNC"), + PINCTRL_PIN(262, "HDA_SDO"), + PINCTRL_PIN(263, "HDA_SDI_0"), + PINCTRL_PIN(264, "HDA_RSTB"), + PINCTRL_PIN(265, "HDA_SDI_1"), + PINCTRL_PIN(266, "GPP_R_6"), + PINCTRL_PIN(267, "GPP_R_7"), + /* SPI */ + PINCTRL_PIN(268, "SPI0_IO_2"), + PINCTRL_PIN(269, "SPI0_IO_3"), + PINCTRL_PIN(270, "SPI0_MOSI_IO_0"), + PINCTRL_PIN(271, "SPI0_MISO_IO_1"), + PINCTRL_PIN(272, "SPI0_TPM_CSB"), + PINCTRL_PIN(273, "SPI0_FLASH_0_CSB"), + PINCTRL_PIN(274, "SPI0_FLASH_1_CSB"), + PINCTRL_PIN(275, "SPI0_CLK"), + PINCTRL_PIN(276, "SPI0_CLK_LOOPBK"), }; -static const struct intel_community tgllp_community4[] = { - TGL_COMMUNITY(0, 88, tgllp_community4_gpps), +static const struct intel_padgroup tgllp_community0_gpps[] = { + TGL_GPP(0, 0, 25, 0), /* GPP_B */ + TGL_GPP(1, 26, 41, 32), /* GPP_T */ + TGL_GPP(2, 42, 66, 64), /* GPP_A */ }; -static const struct intel_pinctrl_soc_data tgllp_community4_soc_data = { - .uid = "4", - .pins = tgllp_community4_pins, - .npins = ARRAY_SIZE(tgllp_community4_pins), - .communities = tgllp_community4, - .ncommunities = ARRAY_SIZE(tgllp_community4), +static const struct intel_padgroup tgllp_community1_gpps[] = { + TGL_GPP(0, 67, 74, 96), /* GPP_S */ + TGL_GPP(1, 75, 98, 128), /* GPP_H */ + TGL_GPP(2, 99, 119, 160), /* GPP_D */ + TGL_GPP(3, 120, 143, 192), /* GPP_U */ + TGL_GPP(4, 144, 170, 224), /* vGPIO */ }; -static const struct pinctrl_pin_desc tgllp_community5_pins[] = { - /* GPP_R */ - PINCTRL_PIN(0, "HDA_BCLK"), - PINCTRL_PIN(1, "HDA_SYNC"), - PINCTRL_PIN(2, "HDA_SDO"), - PINCTRL_PIN(3, "HDA_SDI_0"), - PINCTRL_PIN(4, "HDA_RSTB"), - PINCTRL_PIN(5, "HDA_SDI_1"), - PINCTRL_PIN(6, "GPP_R_6"), - PINCTRL_PIN(7, "GPP_R_7"), - /* SPI */ - PINCTRL_PIN(8, "SPI0_IO_2"), - PINCTRL_PIN(9, "SPI0_IO_3"), - PINCTRL_PIN(10, "SPI0_MOSI_IO_0"), - PINCTRL_PIN(11, "SPI0_MISO_IO_1"), - PINCTRL_PIN(12, "SPI0_TPM_CSB"), - PINCTRL_PIN(13, "SPI0_FLASH_0_CSB"), - PINCTRL_PIN(14, "SPI0_FLASH_1_CSB"), - PINCTRL_PIN(15, "SPI0_CLK"), - PINCTRL_PIN(16, "SPI0_CLK_LOOPBK"), +static const struct intel_padgroup tgllp_community4_gpps[] = { + TGL_GPP(0, 171, 194, 256), /* GPP_C */ + TGL_GPP(1, 195, 219, 288), /* GPP_F */ + TGL_GPP(2, 220, 225, TGL_NO_GPIO), /* HVCMOS */ + TGL_GPP(3, 226, 250, 320), /* GPP_E */ + TGL_GPP(4, 251, 259, TGL_NO_GPIO), /* JTAG */ }; static const struct intel_padgroup tgllp_community5_gpps[] = { - TGL_GPP(0, 0, 7), /* GPP_R */ - TGL_GPP(1, 8, 16), /* SPI */ -}; - -static const struct intel_community tgllp_community5[] = { - TGL_COMMUNITY(0, 16, tgllp_community5_gpps), + TGL_GPP(0, 260, 267, 352), /* GPP_R */ + TGL_GPP(1, 268, 276, TGL_NO_GPIO), /* SPI */ }; -static const struct intel_pinctrl_soc_data tgllp_community5_soc_data = { - .uid = "5", - .pins = tgllp_community5_pins, - .npins = ARRAY_SIZE(tgllp_community5_pins), - .communities = tgllp_community5, - .ncommunities = ARRAY_SIZE(tgllp_community5), +static const struct intel_community tgllp_communities[] = { + TGL_COMMUNITY(0, 0, 66, tgllp_community0_gpps), + TGL_COMMUNITY(1, 67, 170, tgllp_community1_gpps), + TGL_COMMUNITY(2, 171, 259, tgllp_community4_gpps), + TGL_COMMUNITY(3, 260, 276, tgllp_community5_gpps), }; -static const struct intel_pinctrl_soc_data *tgllp_soc_data_array[] = { - &tgllp_community0_soc_data, - &tgllp_community1_soc_data, - &tgllp_community4_soc_data, - &tgllp_community5_soc_data, - NULL +static const struct intel_pinctrl_soc_data tgllp_soc_data = { + .pins = tgllp_pins, + .npins = ARRAY_SIZE(tgllp_pins), + .communities = tgllp_communities, + .ncommunities = ARRAY_SIZE(tgllp_communities), }; static const struct acpi_device_id tgl_pinctrl_acpi_match[] = { - { "INT34C5", (kernel_ulong_t)tgllp_soc_data_array }, + { "INT34C5", (kernel_ulong_t)&tgllp_soc_data }, { } }; MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match); @@ -438,7 +391,7 @@ MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match); static INTEL_PINCTRL_PM_OPS(tgl_pinctrl_pm_ops); static struct platform_driver tgl_pinctrl_driver = { - .probe = intel_pinctrl_probe_by_uid, + .probe = intel_pinctrl_probe_by_hid, .driver = { .name = "tigerlake-pinctrl", .acpi_match_table = tgl_pinctrl_acpi_match, diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 1b6e8646700f..2ac921c83da9 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -147,8 +147,8 @@ static const unsigned int sdio_d0_pins[] = { GPIOX_0 }; static const unsigned int sdio_d1_pins[] = { GPIOX_1 }; static const unsigned int sdio_d2_pins[] = { GPIOX_2 }; static const unsigned int sdio_d3_pins[] = { GPIOX_3 }; -static const unsigned int sdio_cmd_pins[] = { GPIOX_4 }; -static const unsigned int sdio_clk_pins[] = { GPIOX_5 }; +static const unsigned int sdio_clk_pins[] = { GPIOX_4 }; +static const unsigned int sdio_cmd_pins[] = { GPIOX_5 }; static const unsigned int sdio_irq_pins[] = { GPIOX_7 }; static const unsigned int nand_ce0_pins[] = { BOOT_8 }; diff --git a/drivers/pinctrl/pinctrl-falcon.c b/drivers/pinctrl/pinctrl-falcon.c index a454f57c264e..62c02b969327 100644 --- a/drivers/pinctrl/pinctrl-falcon.c +++ b/drivers/pinctrl/pinctrl-falcon.c @@ -451,7 +451,7 @@ static int pinctrl_falcon_probe(struct platform_device *pdev) falcon_info.clk[*bank] = clk_get(&ppdev->dev, NULL); if (IS_ERR(falcon_info.clk[*bank])) { dev_err(&ppdev->dev, "failed to get clock\n"); - of_node_put(np) + of_node_put(np); return PTR_ERR(falcon_info.clk[*bank]); } falcon_info.membase[*bank] = devm_ioremap_resource(&pdev->dev, diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 5d6f9f61ce02..1a948c3f54b7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -960,7 +960,6 @@ static int msm_gpio_irq_set_wake(struct irq_data *d, unsigned int on) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct msm_pinctrl *pctrl = gpiochip_get_data(gc); - unsigned long flags; /* * While they may not wake up when the TLMM is powered off, @@ -971,12 +970,8 @@ static int msm_gpio_irq_set_wake(struct irq_data *d, unsigned int on) if (d->parent_data) irq_chip_set_wake_parent(d, on); - raw_spin_lock_irqsave(&pctrl->lock, flags); - irq_set_irq_wake(pctrl->irq, on); - raw_spin_unlock_irqrestore(&pctrl->lock, flags); - return 0; } @@ -1109,7 +1104,6 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) pctrl->irq_chip.irq_mask = msm_gpio_irq_mask; pctrl->irq_chip.irq_unmask = msm_gpio_irq_unmask; pctrl->irq_chip.irq_ack = msm_gpio_irq_ack; - pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent; pctrl->irq_chip.irq_set_type = msm_gpio_irq_set_type; pctrl->irq_chip.irq_set_wake = msm_gpio_irq_set_wake; pctrl->irq_chip.irq_request_resources = msm_gpio_irq_reqres; @@ -1123,7 +1117,7 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl) if (!chip->irq.parent_domain) return -EPROBE_DEFER; chip->irq.child_to_parent_hwirq = msm_gpio_wakeirq; - + pctrl->irq_chip.irq_eoi = irq_chip_eoi_parent; /* * Let's skip handling the GPIOs, if the parent irqchip * is handling the direct connect IRQ of the GPIO. diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index dca86886b1f9..6b7f0d56a532 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -794,7 +794,7 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) girq->fwnode = of_node_to_fwnode(pctrl->dev->of_node); girq->parent_domain = parent_domain; girq->child_to_parent_hwirq = pm8xxx_child_to_parent_hwirq; - girq->populate_parent_fwspec = gpiochip_populate_parent_fwspec_fourcell; + girq->populate_parent_fwspec = gpiochip_populate_parent_fwspec_twocell; girq->child_offset_to_irq = pm8xxx_child_offset_to_irq; girq->child_irq_domain_ops.translate = pm8xxx_domain_translate; diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c index 24866a5958ae..a9875038ed9b 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c @@ -2305,7 +2305,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_ATAG0_A, 0, FN_REMOCON_B, 0, /* IP0_11_8 [4] */ FN_SD1_DAT2_A, FN_MMC_D2, 0, FN_BS, - FN_ATADIR0_A, 0, FN_SDSELF_B, 0, + FN_ATADIR0_A, 0, FN_SDSELF_A, 0, FN_PWM4_B, 0, 0, 0, 0, 0, 0, 0, /* IP0_7_5 [3] */ @@ -2349,7 +2349,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_TS_SDAT0_A, 0, 0, 0, 0, 0, 0, 0, /* IP1_10_8 [3] */ - FN_SD1_CLK_B, FN_MMC_D6, 0, FN_A24, + FN_SD1_CD_A, FN_MMC_D6, 0, FN_A24, FN_DREQ1_A, 0, FN_HRX0_B, FN_TS_SPSYNC0_A, /* IP1_7_5 [3] */ FN_A23, FN_HTX0_B, FN_TX2_B, FN_DACK2_A, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a77965.c b/drivers/pinctrl/sh-pfc/pfc-r8a77965.c index 8bdf33c807f6..6616f5210b9d 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a77965.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a77965.c @@ -5998,7 +5998,7 @@ static const struct pinmux_drive_reg pinmux_drive_regs[] = { { PIN_DU_DOTCLKIN1, 0, 2 }, /* DU_DOTCLKIN1 */ } }, { PINMUX_DRIVE_REG("DRVCTRL12", 0xe6060330) { - { PIN_DU_DOTCLKIN3, 28, 2 }, /* DU_DOTCLKIN3 */ + { PIN_DU_DOTCLKIN3, 24, 2 }, /* DU_DOTCLKIN3 */ { PIN_FSCLKST, 20, 2 }, /* FSCLKST */ { PIN_TMS, 4, 2 }, /* TMS */ } }, @@ -6254,8 +6254,8 @@ static const struct pinmux_bias_reg pinmux_bias_regs[] = { [31] = PIN_DU_DOTCLKIN1, /* DU_DOTCLKIN1 */ } }, { PINMUX_BIAS_REG("PUEN3", 0xe606040c, "PUD3", 0xe606044c) { - [ 0] = PIN_DU_DOTCLKIN3, /* DU_DOTCLKIN3 */ - [ 1] = SH_PFC_PIN_NONE, + [ 0] = SH_PFC_PIN_NONE, + [ 1] = PIN_DU_DOTCLKIN3, /* DU_DOTCLKIN3 */ [ 2] = PIN_FSCLKST, /* FSCLKST */ [ 3] = PIN_EXTALR, /* EXTALR*/ [ 4] = PIN_TRST_N, /* TRST# */ diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index 4a95867deb8a..5a026601d4f9 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -497,17 +497,15 @@ enum { SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK, CRX0_MARK, CRX1_MARK, CTX0_MARK, CTX1_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK, PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK, PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK, PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK, IERXD_MARK, IETXD_MARK, - CRX0_CRX1_MARK, WDTOVF_MARK, - CRX0X1_MARK, - /* DMAC */ TEND0_MARK, DACK0_MARK, DREQ0_MARK, TEND1_MARK, DACK1_MARK, DREQ1_MARK, @@ -995,12 +993,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(PJ3_DATA, PJ3MD_00), PINMUX_DATA(CRX1_MARK, PJ3MD_01), - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10), + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10), PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11), PINMUX_DATA(PJ2_DATA, PJ2MD_000), PINMUX_DATA(CTX1_MARK, PJ2MD_001), - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010), + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010), PINMUX_DATA(CS2_MARK, PJ2MD_011), PINMUX_DATA(SCK0_MARK, PJ2MD_100), PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101), @@ -1245,6 +1243,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0), GPIO_FN(CRX0_CRX1), diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index 6cbb18ef77dc..d20974a55d93 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -737,13 +737,12 @@ enum { CRX0_MARK, CTX0_MARK, CRX1_MARK, CTX1_MARK, CRX2_MARK, CTX2_MARK, - CRX0_CRX1_MARK, - CRX0_CRX1_CRX2_MARK, - CTX0CTX1CTX2_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK, CRX1_PJ22_MARK, CTX1_PJ23_MARK, CRX2_PJ20_MARK, CTX2_PJ21_MARK, - CRX0CRX1_PJ22_MARK, - CRX0CRX1CRX2_PJ20_MARK, + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK, + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK, /* VDC */ DV_CLK_MARK, @@ -821,6 +820,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CS3_MARK, PC8MD_001), PINMUX_DATA(TXD7_MARK, PC8MD_010), PINMUX_DATA(CTX1_MARK, PC8MD_011), + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100), PINMUX_DATA(PC7_DATA, PC7MD_000), PINMUX_DATA(CKE_MARK, PC7MD_001), @@ -833,11 +833,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CAS_MARK, PC6MD_001), PINMUX_DATA(SCK7_MARK, PC6MD_010), PINMUX_DATA(CTX0_MARK, PC6MD_011), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100), PINMUX_DATA(PC5_DATA, PC5MD_000), PINMUX_DATA(RAS_MARK, PC5MD_001), PINMUX_DATA(CRX0_MARK, PC5MD_011), - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100), PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101), PINMUX_DATA(PC4_DATA, PC4MD_00), @@ -1289,30 +1290,32 @@ static const u16 pinmux_data[] = { PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010), PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011), PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100), - PINMUX_DATA(CTX1_MARK, PJ23MD_101), + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101), + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110), PINMUX_DATA(PJ22_DATA, PJ22MD_000), PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001), PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010), PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011), PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100), - PINMUX_DATA(CRX1_MARK, PJ22MD_101), - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110), + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101), + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110), PINMUX_DATA(PJ21_DATA, PJ21MD_000), PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001), PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010), PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011), PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100), - PINMUX_DATA(CTX2_MARK, PJ21MD_101), + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101), + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110), PINMUX_DATA(PJ20_DATA, PJ20MD_000), PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001), PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010), PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011), PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100), - PINMUX_DATA(CRX2_MARK, PJ20MD_101), - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110), + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101), + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110), PINMUX_DATA(PJ19_DATA, PJ19MD_000), PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001), @@ -1663,12 +1666,24 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(WDTOVF), /* CAN */ + GPIO_FN(CTX2), + GPIO_FN(CRX2), GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), GPIO_FN(CRX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0_CRX1), + GPIO_FN(CTX0_CTX1_CTX2), GPIO_FN(CRX0_CRX1_CRX2), + GPIO_FN(CTX2_PJ21), + GPIO_FN(CRX2_PJ20), + GPIO_FN(CTX1_PJ23), + GPIO_FN(CRX1_PJ22), + GPIO_FN(CTX0_CTX1_PJ23), + GPIO_FN(CRX0_CRX1_PJ22), + GPIO_FN(CTX0_CTX1_CTX2_PJ21), + GPIO_FN(CRX0_CRX1_CRX2_PJ20), /* DMAC */ GPIO_FN(TEND0), diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 6669db2555fb..9ff5c5da1a2f 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -173,6 +173,11 @@ static const struct key_entry dell_wmi_keymap_type_0000[] = { /* Dell Support Center key */ { KE_IGNORE, 0xe06e, { KEY_RESERVED } }, + /* Dell Vostro 3360 multimedia keys with mangled DSDT */ + { KE_KEY, 0xe0f1, { KEY_PROG1 } }, + { KE_KEY, 0xe0f2, { KEY_PROG2 } }, + { KE_KEY, 0xe0f3, { KEY_PROG3 } }, + { KE_IGNORE, 0xe0f7, { KEY_MUTE } }, { KE_IGNORE, 0xe0f8, { KEY_VOLUMEDOWN } }, { KE_IGNORE, 0xe0f9, { KEY_VOLUMEUP } }, diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 292bace83f1e..6f436836fe50 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -146,9 +146,10 @@ static int mid_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - ddata = (struct mid_pb_ddata *)id->driver_data; + ddata = devm_kmemdup(&pdev->dev, (void *)id->driver_data, + sizeof(*ddata), GFP_KERNEL); if (!ddata) - return -ENODATA; + return -ENOMEM; ddata->dev = &pdev->dev; ddata->irq = irq; diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index cdab916fbf92..e330ec73c465 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -67,26 +67,22 @@ struct intel_scu_ipc_pdata_t { u32 i2c_base; u32 i2c_len; - u8 irq_mode; }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 0, }; /* Penwell and Cloverview */ static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 1, }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { .i2c_base = 0xff00d000, .i2c_len = 0x10, - .irq_mode = 0, }; struct intel_scu_ipc_dev { @@ -99,6 +95,9 @@ struct intel_scu_ipc_dev { static struct intel_scu_ipc_dev ipcdev; /* Only one for now */ +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ BIT(2) + /* * IPC Read Buffer (Read Only): * 16 byte buffer for receiving data from SCU, if IPC command @@ -120,11 +119,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ */ static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd) { - if (scu->irq_mode) { - reinit_completion(&scu->cmd_complete); - writel(cmd | IPC_IOC, scu->ipc_base); - } - writel(cmd, scu->ipc_base); + reinit_completion(&scu->cmd_complete); + writel(cmd | IPC_IOC, scu->ipc_base); } /* @@ -610,9 +606,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl); static irqreturn_t ioc(int irq, void *dev_id) { struct intel_scu_ipc_dev *scu = dev_id; + int status = ipc_read_status(scu); - if (scu->irq_mode) - complete(&scu->cmd_complete); + writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS); + complete(&scu->cmd_complete); return IRQ_HANDLED; } @@ -638,8 +635,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!pdata) return -ENODEV; - scu->irq_mode = pdata->irq_mode; - err = pcim_enable_device(pdev); if (err) return err; diff --git a/drivers/power/supply/axp20x_ac_power.c b/drivers/power/supply/axp20x_ac_power.c index 0d34a932b6d5..f74b0556bb6b 100644 --- a/drivers/power/supply/axp20x_ac_power.c +++ b/drivers/power/supply/axp20x_ac_power.c @@ -23,6 +23,8 @@ #define AXP20X_PWR_STATUS_ACIN_PRESENT BIT(7) #define AXP20X_PWR_STATUS_ACIN_AVAIL BIT(6) +#define AXP813_ACIN_PATH_SEL BIT(7) + #define AXP813_VHOLD_MASK GENMASK(5, 3) #define AXP813_VHOLD_UV_TO_BIT(x) ((((x) / 100000) - 40) << 3) #define AXP813_VHOLD_REG_TO_UV(x) \ @@ -40,6 +42,7 @@ struct axp20x_ac_power { struct power_supply *supply; struct iio_channel *acin_v; struct iio_channel *acin_i; + bool has_acin_path_sel; }; static irqreturn_t axp20x_ac_power_irq(int irq, void *devid) @@ -86,6 +89,17 @@ static int axp20x_ac_power_get_property(struct power_supply *psy, return ret; val->intval = !!(reg & AXP20X_PWR_STATUS_ACIN_AVAIL); + + /* ACIN_PATH_SEL disables ACIN even if ACIN_AVAIL is set. */ + if (val->intval && power->has_acin_path_sel) { + ret = regmap_read(power->regmap, AXP813_ACIN_PATH_CTRL, + ®); + if (ret) + return ret; + + val->intval = !!(reg & AXP813_ACIN_PATH_SEL); + } + return 0; case POWER_SUPPLY_PROP_VOLTAGE_NOW: @@ -224,21 +238,25 @@ static const struct power_supply_desc axp813_ac_power_desc = { struct axp_data { const struct power_supply_desc *power_desc; bool acin_adc; + bool acin_path_sel; }; static const struct axp_data axp20x_data = { - .power_desc = &axp20x_ac_power_desc, - .acin_adc = true, + .power_desc = &axp20x_ac_power_desc, + .acin_adc = true, + .acin_path_sel = false, }; static const struct axp_data axp22x_data = { - .power_desc = &axp22x_ac_power_desc, - .acin_adc = false, + .power_desc = &axp22x_ac_power_desc, + .acin_adc = false, + .acin_path_sel = false, }; static const struct axp_data axp813_data = { - .power_desc = &axp813_ac_power_desc, - .acin_adc = false, + .power_desc = &axp813_ac_power_desc, + .acin_adc = false, + .acin_path_sel = true, }; static int axp20x_ac_power_probe(struct platform_device *pdev) @@ -282,6 +300,7 @@ static int axp20x_ac_power_probe(struct platform_device *pdev) } power->regmap = dev_get_regmap(pdev->dev.parent, NULL); + power->has_acin_path_sel = axp_data->acin_path_sel; platform_set_drvdata(pdev, power); diff --git a/drivers/power/supply/ingenic-battery.c b/drivers/power/supply/ingenic-battery.c index 35816d4b3012..2748715c4c75 100644 --- a/drivers/power/supply/ingenic-battery.c +++ b/drivers/power/supply/ingenic-battery.c @@ -100,10 +100,17 @@ static int ingenic_battery_set_scale(struct ingenic_battery *bat) return -EINVAL; } - return iio_write_channel_attribute(bat->channel, - scale_raw[best_idx], - scale_raw[best_idx + 1], - IIO_CHAN_INFO_SCALE); + /* Only set scale if there is more than one (fractional) entry */ + if (scale_len > 2) { + ret = iio_write_channel_attribute(bat->channel, + scale_raw[best_idx], + scale_raw[best_idx + 1], + IIO_CHAN_INFO_SCALE); + if (ret) + return ret; + } + + return 0; } static enum power_supply_property ingenic_battery_properties[] = { diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index da49436176cd..30a9014b2f95 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -449,7 +449,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client) { struct ltc294x_info *info = i2c_get_clientdata(client); - cancel_delayed_work(&info->work); + cancel_delayed_work_sync(&info->work); power_supply_unregister(info->supply); return 0; } diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 00772fc53490..88a3c5690fea 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -256,7 +256,7 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) if (!timer_pdev) { dev_err(&pdev->dev, "Unable to find Timer pdev\n"); ret = -ENODEV; - goto put; + goto err_find_timer_pdev; } timer_pdata = dev_get_platdata(&timer_pdev->dev); @@ -264,7 +264,7 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "dmtimer pdata structure NULL, deferring probe\n"); ret = -EPROBE_DEFER; - goto put; + goto err_platdata; } pdata = timer_pdata->timer_ops; @@ -283,30 +283,25 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) !pdata->write_counter) { dev_err(&pdev->dev, "Incomplete dmtimer pdata structure\n"); ret = -EINVAL; - goto put; + goto err_platdata; } if (!of_get_property(timer, "ti,timer-pwm", NULL)) { dev_err(&pdev->dev, "Missing ti,timer-pwm capability\n"); ret = -ENODEV; - goto put; + goto err_timer_property; } dm_timer = pdata->request_by_node(timer); if (!dm_timer) { ret = -EPROBE_DEFER; - goto put; + goto err_request_timer; } -put: - of_node_put(timer); - if (ret < 0) - return ret; - omap = devm_kzalloc(&pdev->dev, sizeof(*omap), GFP_KERNEL); if (!omap) { - pdata->free(dm_timer); - return -ENOMEM; + ret = -ENOMEM; + goto err_alloc_omap; } omap->pdata = pdata; @@ -339,27 +334,56 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) ret = pwmchip_add(&omap->chip); if (ret < 0) { dev_err(&pdev->dev, "failed to register PWM\n"); - omap->pdata->free(omap->dm_timer); - return ret; + goto err_pwmchip_add; } + of_node_put(timer); + platform_set_drvdata(pdev, omap); return 0; + +err_pwmchip_add: + + /* + * *omap is allocated using devm_kzalloc, + * so no free necessary here + */ +err_alloc_omap: + + pdata->free(dm_timer); +err_request_timer: + +err_timer_property: +err_platdata: + + put_device(&timer_pdev->dev); +err_find_timer_pdev: + + of_node_put(timer); + + return ret; } static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&omap->chip); + if (ret) + return ret; if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); omap->pdata->free(omap->dm_timer); + put_device(&omap->dm_timer_pdev->dev); + mutex_destroy(&omap->mutex); - return pwmchip_remove(&omap->chip); + return 0; } static const struct of_device_id pwm_omap_dmtimer_of_match[] = { diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index 168684b02ebc..b07bdca3d510 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -159,13 +159,9 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset, static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); - mutex_lock(&pca->lock); - pwm = &pca->chip.pwms[offset]; - mutex_unlock(&pca->lock); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 03d79fee2987..d015d99cb59d 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -3470,6 +3470,7 @@ int regulator_set_voltage_rdev(struct regulator_dev *rdev, int min_uV, out: return ret; } +EXPORT_SYMBOL_GPL(regulator_set_voltage_rdev); static int regulator_limit_voltage_step(struct regulator_dev *rdev, int *current_uV, int *min_uV) @@ -4034,6 +4035,7 @@ int regulator_get_voltage_rdev(struct regulator_dev *rdev) return ret; return ret - rdev->constraints->uV_offset; } +EXPORT_SYMBOL_GPL(regulator_get_voltage_rdev); /** * regulator_get_voltage - get regulator output voltage diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c index ca3dc3f3bb29..bb16c465426e 100644 --- a/drivers/regulator/helpers.c +++ b/drivers/regulator/helpers.c @@ -13,6 +13,8 @@ #include #include +#include "internal.h" + /** * regulator_is_enabled_regmap - standard is_enabled() for regmap users * @@ -881,3 +883,15 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, consumers[i].supply = supply_names[i]; } EXPORT_SYMBOL_GPL(regulator_bulk_set_supply_names); + +/** + * regulator_is_equal - test whether two regulators are the same + * + * @reg1: first regulator to operate on + * @reg2: second regulator to operate on + */ +bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2) +{ + return reg1->rdev == reg2->rdev; +} +EXPORT_SYMBOL_GPL(regulator_is_equal); diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 5b4003226484..31f79fda3238 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -1282,7 +1282,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev, } if (!pdata->dvs_gpio[i]) { - dev_warn(dev, "there is no dvs%d gpio\n", i); + dev_info(dev, "there is no dvs%d gpio\n", i); continue; } diff --git a/drivers/regulator/stm32-vrefbuf.c b/drivers/regulator/stm32-vrefbuf.c index bdfaf7edb75a..992bc18101ef 100644 --- a/drivers/regulator/stm32-vrefbuf.c +++ b/drivers/regulator/stm32-vrefbuf.c @@ -88,7 +88,7 @@ static int stm32_vrefbuf_disable(struct regulator_dev *rdev) } val = readl_relaxed(priv->base + STM32_VREFBUF_CSR); - val = (val & ~STM32_ENVR) | STM32_HIZ; + val &= ~STM32_ENVR; writel_relaxed(val, priv->base + STM32_VREFBUF_CSR); pm_runtime_mark_last_busy(priv->dev); @@ -175,6 +175,7 @@ static const struct regulator_desc stm32_vrefbuf_regu = { .volt_table = stm32_vrefbuf_voltages, .n_voltages = ARRAY_SIZE(stm32_vrefbuf_voltages), .ops = &stm32_vrefbuf_volt_ops, + .off_on_delay = 1000, .type = REGULATOR_VOLTAGE, .owner = THIS_MODULE, }; diff --git a/drivers/regulator/vctrl-regulator.c b/drivers/regulator/vctrl-regulator.c index 9a9ee8188109..cbadb1c99679 100644 --- a/drivers/regulator/vctrl-regulator.c +++ b/drivers/regulator/vctrl-regulator.c @@ -11,10 +11,13 @@ #include #include #include +#include #include #include #include +#include "internal.h" + struct vctrl_voltage_range { int min_uV; int max_uV; @@ -79,7 +82,7 @@ static int vctrl_calc_output_voltage(struct vctrl_data *vctrl, int ctrl_uV) static int vctrl_get_voltage(struct regulator_dev *rdev) { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); - int ctrl_uV = regulator_get_voltage(vctrl->ctrl_reg); + int ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev); return vctrl_calc_output_voltage(vctrl, ctrl_uV); } @@ -90,16 +93,16 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, { struct vctrl_data *vctrl = rdev_get_drvdata(rdev); struct regulator *ctrl_reg = vctrl->ctrl_reg; - int orig_ctrl_uV = regulator_get_voltage(ctrl_reg); + int orig_ctrl_uV = regulator_get_voltage_rdev(ctrl_reg->rdev); int uV = vctrl_calc_output_voltage(vctrl, orig_ctrl_uV); int ret; if (req_min_uV >= uV || !vctrl->ovp_threshold) /* voltage rising or no OVP */ - return regulator_set_voltage( - ctrl_reg, + return regulator_set_voltage_rdev(ctrl_reg->rdev, vctrl_calc_ctrl_voltage(vctrl, req_min_uV), - vctrl_calc_ctrl_voltage(vctrl, req_max_uV)); + vctrl_calc_ctrl_voltage(vctrl, req_max_uV), + PM_SUSPEND_ON); while (uV > req_min_uV) { int max_drop_uV = (uV * vctrl->ovp_threshold) / 100; @@ -114,9 +117,10 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, next_uV = max_t(int, req_min_uV, uV - max_drop_uV); next_ctrl_uV = vctrl_calc_ctrl_voltage(vctrl, next_uV); - ret = regulator_set_voltage(ctrl_reg, + ret = regulator_set_voltage_rdev(ctrl_reg->rdev, + next_ctrl_uV, next_ctrl_uV, - next_ctrl_uV); + PM_SUSPEND_ON); if (ret) goto err; @@ -130,7 +134,8 @@ static int vctrl_set_voltage(struct regulator_dev *rdev, err: /* Try to go back to original voltage */ - regulator_set_voltage(ctrl_reg, orig_ctrl_uV, orig_ctrl_uV); + regulator_set_voltage_rdev(ctrl_reg->rdev, orig_ctrl_uV, orig_ctrl_uV, + PM_SUSPEND_ON); return ret; } @@ -155,9 +160,10 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, if (selector >= vctrl->sel || !vctrl->ovp_threshold) { /* voltage rising or no OVP */ - ret = regulator_set_voltage(ctrl_reg, + ret = regulator_set_voltage_rdev(ctrl_reg->rdev, + vctrl->vtable[selector].ctrl, vctrl->vtable[selector].ctrl, - vctrl->vtable[selector].ctrl); + PM_SUSPEND_ON); if (!ret) vctrl->sel = selector; @@ -173,9 +179,10 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, else next_sel = vctrl->vtable[vctrl->sel].ovp_min_sel; - ret = regulator_set_voltage(ctrl_reg, + ret = regulator_set_voltage_rdev(ctrl_reg->rdev, vctrl->vtable[next_sel].ctrl, - vctrl->vtable[next_sel].ctrl); + vctrl->vtable[next_sel].ctrl, + PM_SUSPEND_ON); if (ret) { dev_err(&rdev->dev, "failed to set control voltage to %duV\n", @@ -195,9 +202,10 @@ static int vctrl_set_voltage_sel(struct regulator_dev *rdev, err: if (vctrl->sel != orig_sel) { /* Try to go back to original voltage */ - if (!regulator_set_voltage(ctrl_reg, + if (!regulator_set_voltage_rdev(ctrl_reg->rdev, + vctrl->vtable[orig_sel].ctrl, vctrl->vtable[orig_sel].ctrl, - vctrl->vtable[orig_sel].ctrl)) + PM_SUSPEND_ON)) vctrl->sel = orig_sel; else dev_warn(&rdev->dev, @@ -482,7 +490,7 @@ static int vctrl_probe(struct platform_device *pdev) if (ret) return ret; - ctrl_uV = regulator_get_voltage(vctrl->ctrl_reg); + ctrl_uV = regulator_get_voltage_rdev(vctrl->ctrl_reg->rdev); if (ctrl_uV < 0) { dev_err(&pdev->dev, "failed to get control voltage\n"); return ctrl_uV; diff --git a/drivers/remoteproc/qcom_q6v5_mss.c b/drivers/remoteproc/qcom_q6v5_mss.c index 471128a2e723..164fc2a53ef1 100644 --- a/drivers/remoteproc/qcom_q6v5_mss.c +++ b/drivers/remoteproc/qcom_q6v5_mss.c @@ -1594,7 +1594,6 @@ static const struct rproc_hexagon_res msm8998_mss = { .active_clk_names = (char*[]){ "iface", "bus", - "mem", "gpll0_mss", "mnoc_axi", "snoc_axi", diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 307df98347ba..8115f945151b 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -2223,7 +2223,7 @@ static int __init remoteproc_init(void) return 0; } -module_init(remoteproc_init); +subsys_initcall(remoteproc_init); static void __exit remoteproc_exit(void) { diff --git a/drivers/reset/reset-uniphier.c b/drivers/reset/reset-uniphier.c index 74e589f5dd6a..279e535bf5d8 100644 --- a/drivers/reset/reset-uniphier.c +++ b/drivers/reset/reset-uniphier.c @@ -193,8 +193,8 @@ static const struct uniphier_reset_data uniphier_pro5_sd_reset_data[] = { #define UNIPHIER_PERI_RESET_FI2C(id, ch) \ UNIPHIER_RESETX((id), 0x114, 24 + (ch)) -#define UNIPHIER_PERI_RESET_SCSSI(id) \ - UNIPHIER_RESETX((id), 0x110, 17) +#define UNIPHIER_PERI_RESET_SCSSI(id, ch) \ + UNIPHIER_RESETX((id), 0x110, 17 + (ch)) #define UNIPHIER_PERI_RESET_MCSSI(id) \ UNIPHIER_RESETX((id), 0x114, 14) @@ -209,7 +209,7 @@ static const struct uniphier_reset_data uniphier_ld4_peri_reset_data[] = { UNIPHIER_PERI_RESET_I2C(6, 2), UNIPHIER_PERI_RESET_I2C(7, 3), UNIPHIER_PERI_RESET_I2C(8, 4), - UNIPHIER_PERI_RESET_SCSSI(11), + UNIPHIER_PERI_RESET_SCSSI(11, 0), UNIPHIER_RESET_END, }; @@ -225,8 +225,11 @@ static const struct uniphier_reset_data uniphier_pro4_peri_reset_data[] = { UNIPHIER_PERI_RESET_FI2C(8, 4), UNIPHIER_PERI_RESET_FI2C(9, 5), UNIPHIER_PERI_RESET_FI2C(10, 6), - UNIPHIER_PERI_RESET_SCSSI(11), - UNIPHIER_PERI_RESET_MCSSI(12), + UNIPHIER_PERI_RESET_SCSSI(11, 0), + UNIPHIER_PERI_RESET_SCSSI(12, 1), + UNIPHIER_PERI_RESET_SCSSI(13, 2), + UNIPHIER_PERI_RESET_SCSSI(14, 3), + UNIPHIER_PERI_RESET_MCSSI(15), UNIPHIER_RESET_END, }; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index d77515d8382c..91a1b13fbf71 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -240,6 +240,7 @@ config RTC_DRV_AS3722 config RTC_DRV_DS1307 tristate "Dallas/Maxim DS1307/37/38/39/40/41, ST M41T00, EPSON RX-8025, ISL12057" + select REGMAP_I2C help If you say yes here you get support for various compatible RTC chips (often with battery backup) connected with I2C. This driver @@ -326,6 +327,7 @@ config RTC_DRV_MAX6900 config RTC_DRV_MAX8907 tristate "Maxim MAX8907" depends on MFD_MAX8907 || COMPILE_TEST + select REGMAP_IRQ help If you say yes here you will get support for the RTC of Maxim MAX8907 PMIC. @@ -621,6 +623,7 @@ config RTC_DRV_RX8010 config RTC_DRV_RX8581 tristate "Epson RX-8571/RX-8581" + select REGMAP_I2C help If you say yes here you will get support for the Epson RX-8571/ RX-8581. @@ -648,6 +651,7 @@ config RTC_DRV_EM3027 config RTC_DRV_RV3028 tristate "Micro Crystal RV3028" + select REGMAP_I2C help If you say yes here you get support for the Micro Crystal RV3028. @@ -677,6 +681,7 @@ config RTC_DRV_S5M config RTC_DRV_SD3078 tristate "ZXW Shenzhen whwave SD3078" + select REGMAP_I2C help If you say yes here you get support for the ZXW Shenzhen whwave SD3078 RTC chips. @@ -848,14 +853,14 @@ config RTC_I2C_AND_SPI default m if I2C=m default y if I2C=y default y if SPI_MASTER=y - select REGMAP_I2C if I2C - select REGMAP_SPI if SPI_MASTER comment "SPI and I2C RTC drivers" config RTC_DRV_DS3232 tristate "Dallas/Maxim DS3232/DS3234" depends on RTC_I2C_AND_SPI + select REGMAP_I2C if I2C + select REGMAP_SPI if SPI_MASTER help If you say yes here you get support for Dallas Semiconductor DS3232 and DS3234 real-time clock chips. If an interrupt is associated @@ -875,6 +880,8 @@ config RTC_DRV_DS3232_HWMON config RTC_DRV_PCF2127 tristate "NXP PCF2127" depends on RTC_I2C_AND_SPI + select REGMAP_I2C if I2C + select REGMAP_SPI if SPI_MASTER select WATCHDOG_CORE if WATCHDOG help If you say yes here you get support for the NXP PCF2127/29 RTC @@ -891,6 +898,8 @@ config RTC_DRV_PCF2127 config RTC_DRV_RV3029C2 tristate "Micro Crystal RV3029/3049" depends on RTC_I2C_AND_SPI + select REGMAP_I2C if I2C + select REGMAP_SPI if SPI_MASTER help If you say yes here you get support for the Micro Crystal RV3029 and RV3049 RTC chips. diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 033303708c8b..cb28bbdc9e17 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -850,7 +850,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rtc_cmos_int_handler = cmos_interrupt; retval = request_irq(rtc_irq, rtc_cmos_int_handler, - IRQF_SHARED, dev_name(&cmos_rtc.rtc->dev), + 0, dev_name(&cmos_rtc.rtc->dev), cmos_rtc.rtc); if (retval < 0) { dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index 443f6d05ce29..fb6d7967ec00 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -97,7 +97,7 @@ static int hym8563_rtc_read_time(struct device *dev, struct rtc_time *tm) if (!hym8563->valid) { dev_warn(&client->dev, "no valid clock/calendar values available\n"); - return -EPERM; + return -EINVAL; } ret = i2c_smbus_read_i2c_block_data(client, HYM8563_SEC, 7, buf); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 9135e2101752..cda238dfe69b 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -297,15 +297,7 @@ static int mtk_rtc_probe(struct platform_device *pdev) rtc->rtc_dev->ops = &mtk_rtc_ops; - ret = rtc_register_device(rtc->rtc_dev); - if (ret) - goto out_free_irq; - - return 0; - -out_free_irq: - free_irq(rtc->irq, rtc); - return ret; + return rtc_register_device(rtc->rtc_dev); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 6cca72782af6..cf87eb27879f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void) (unsigned long) block); INIT_LIST_HEAD(&block->ccw_queue); spin_lock_init(&block->queue_lock); + INIT_LIST_HEAD(&block->format_list); + spin_lock_init(&block->format_lock); timer_setup(&block->timer, dasd_block_timeout, 0); spin_lock_init(&block->profile.lock); @@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, if (dasd_ese_needs_format(cqr->block, irb)) { if (rq_data_dir((struct request *)cqr->callback_data) == READ) { - device->discipline->ese_read(cqr); + device->discipline->ese_read(cqr, irb); cqr->status = DASD_CQR_SUCCESS; cqr->stopclk = now; dasd_device_clear_timer(device); dasd_schedule_device_bh(device); return; } - fcqr = device->discipline->ese_format(device, cqr); + fcqr = device->discipline->ese_format(device, cqr, irb); if (IS_ERR(fcqr)) { + if (PTR_ERR(fcqr) == -EINVAL) { + cqr->status = DASD_CQR_ERROR; + return; + } /* * If we can't format now, let the request go * one extra round. Maybe we can format later. */ cqr->status = DASD_CQR_QUEUED; + dasd_schedule_device_bh(device); + return; } else { fcqr->status = DASD_CQR_QUEUED; cqr->status = DASD_CQR_QUEUED; @@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) { struct request *req; blk_status_t error = BLK_STS_OK; + unsigned int proc_bytes; int status; req = (struct request *) cqr->callback_data; dasd_profile_end(cqr->block, cqr, req); + proc_bytes = cqr->proc_bytes; status = cqr->block->base->discipline->free_cp(cqr, req); if (status < 0) error = errno_to_blk_status(status); @@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) blk_mq_end_request(req, error); blk_mq_run_hw_queues(req->q, true); } else { - blk_mq_complete_request(req); + /* + * Partial completed requests can happen with ESE devices. + * During read we might have gotten a NRF error and have to + * complete a request partially. + */ + if (proc_bytes) { + blk_update_request(req, BLK_STS_OK, + blk_rq_bytes(req) - proc_bytes); + blk_mq_requeue_request(req, true); + } else { + blk_mq_complete_request(req); + } } } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index a28b9ff82378..ad44d22e8859 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head) geo->head |= head; } +/* + * calculate failing track from sense data depending if + * it is an EAV device or not + */ +static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device, + sector_t *track) +{ + struct dasd_eckd_private *private = device->private; + u8 *sense = NULL; + u32 cyl; + u8 head; + + sense = dasd_get_sense(irb); + if (!sense) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "ESE error no sense data\n"); + return -EINVAL; + } + if (!(sense[27] & DASD_SENSE_BIT_2)) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "ESE error no valid track data\n"); + return -EINVAL; + } + + if (sense[27] & DASD_SENSE_BIT_3) { + /* enhanced addressing */ + cyl = sense[30] << 20; + cyl |= (sense[31] & 0xF0) << 12; + cyl |= sense[28] << 8; + cyl |= sense[29]; + } else { + cyl = sense[29] << 8; + cyl |= sense[30]; + } + head = sense[31] & 0x0F; + *track = cyl * private->rdc_data.trk_per_cyl + head; + return 0; +} + static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data, struct dasd_device *device) { @@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base, 0, NULL); } +static bool test_and_set_format_track(struct dasd_format_entry *to_format, + struct dasd_block *block) +{ + struct dasd_format_entry *format; + unsigned long flags; + bool rc = false; + + spin_lock_irqsave(&block->format_lock, flags); + list_for_each_entry(format, &block->format_list, list) { + if (format->track == to_format->track) { + rc = true; + goto out; + } + } + list_add_tail(&to_format->list, &block->format_list); + +out: + spin_unlock_irqrestore(&block->format_lock, flags); + return rc; +} + +static void clear_format_track(struct dasd_format_entry *format, + struct dasd_block *block) +{ + unsigned long flags; + + spin_lock_irqsave(&block->format_lock, flags); + list_del_init(&format->list); + spin_unlock_irqrestore(&block->format_lock, flags); +} + /* * Callback function to free ESE format requests. */ @@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data) { struct dasd_device *device = cqr->startdev; struct dasd_eckd_private *private = device->private; + struct dasd_format_entry *format = data; + clear_format_track(format, cqr->basedev->block); private->count--; dasd_ffree_request(cqr, device); } static struct dasd_ccw_req * -dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) +dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, + struct irb *irb) { struct dasd_eckd_private *private; + struct dasd_format_entry *format; struct format_data_t fdata; unsigned int recs_per_trk; struct dasd_ccw_req *fcqr; @@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) struct request *req; sector_t first_trk; sector_t last_trk; + sector_t curr_trk; int rc; req = cqr->callback_data; - base = cqr->block->base; + block = cqr->block; + base = block->base; private = base->private; - block = base->block; blksize = block->bp_block; recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize); + format = &startdev->format_entry; first_trk = blk_rq_pos(req) >> block->s2b_shift; sector_div(first_trk, recs_per_trk); last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; sector_div(last_trk, recs_per_trk); + rc = dasd_eckd_track_from_irb(irb, base, &curr_trk); + if (rc) + return ERR_PTR(rc); - fdata.start_unit = first_trk; - fdata.stop_unit = last_trk; + if (curr_trk < first_trk || curr_trk > last_trk) { + DBF_DEV_EVENT(DBF_WARNING, startdev, + "ESE error track %llu not within range %llu - %llu\n", + curr_trk, first_trk, last_trk); + return ERR_PTR(-EINVAL); + } + format->track = curr_trk; + /* test if track is already in formatting by another thread */ + if (test_and_set_format_track(format, block)) + return ERR_PTR(-EEXIST); + + fdata.start_unit = curr_trk; + fdata.stop_unit = curr_trk; fdata.blksize = blksize; fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0; @@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) return fcqr; fcqr->callback = dasd_eckd_ese_format_cb; + fcqr->callback_data = (void *) format; return fcqr; } @@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) /* * When data is read from an unformatted area of an ESE volume, this function * returns zeroed data and thereby mimics a read of zero data. + * + * The first unformatted track is the one that got the NRF error, the address is + * encoded in the sense data. + * + * All tracks before have returned valid data and should not be touched. + * All tracks after the unformatted track might be formatted or not. This is + * currently not known, remember the processed data and return the remainder of + * the request to the blocklayer in __dasd_cleanup_cqr(). */ -static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr) +static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb) { + struct dasd_eckd_private *private; + sector_t first_trk, last_trk; + sector_t first_blk, last_blk; unsigned int blksize, off; + unsigned int recs_per_trk; struct dasd_device *base; struct req_iterator iter; + struct dasd_block *block; + unsigned int skip_block; + unsigned int blk_count; struct request *req; struct bio_vec bv; + sector_t curr_trk; + sector_t end_blk; char *dst; + int rc; req = (struct request *) cqr->callback_data; base = cqr->block->base; blksize = base->block->bp_block; + block = cqr->block; + private = base->private; + skip_block = 0; + blk_count = 0; + + recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize); + first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift; + sector_div(first_trk, recs_per_trk); + last_trk = last_blk = + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; + sector_div(last_trk, recs_per_trk); + rc = dasd_eckd_track_from_irb(irb, base, &curr_trk); + if (rc) + return rc; + + /* sanity check if the current track from sense data is valid */ + if (curr_trk < first_trk || curr_trk > last_trk) { + DBF_DEV_EVENT(DBF_WARNING, base, + "ESE error track %llu not within range %llu - %llu\n", + curr_trk, first_trk, last_trk); + return -EINVAL; + } + + /* + * if not the first track got the NRF error we have to skip over valid + * blocks + */ + if (curr_trk != first_trk) + skip_block = curr_trk * recs_per_trk - first_blk; + + /* we have no information beyond the current track */ + end_blk = (curr_trk + 1) * recs_per_trk; rq_for_each_segment(bv, req, iter) { dst = page_address(bv.bv_page) + bv.bv_offset; for (off = 0; off < bv.bv_len; off += blksize) { - if (dst && rq_data_dir(req) == READ) { + if (first_blk + blk_count >= end_blk) { + cqr->proc_bytes = blk_count * blksize; + return 0; + } + if (dst && !skip_block) { dst += off; memset(dst, 0, blksize); + } else { + skip_block--; } + blk_count++; } } + return 0; } /* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 91c9f9586e0f..fa552f9f1666 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -187,6 +187,7 @@ struct dasd_ccw_req { void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; + unsigned int proc_bytes; /* bytes for partial completion */ }; /* @@ -387,8 +388,9 @@ struct dasd_discipline { int (*ext_pool_warn_thrshld)(struct dasd_device *); int (*ext_pool_oos)(struct dasd_device *); int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *); - struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *); - void (*ese_read)(struct dasd_ccw_req *); + struct dasd_ccw_req *(*ese_format)(struct dasd_device *, + struct dasd_ccw_req *, struct irb *); + int (*ese_read)(struct dasd_ccw_req *, struct irb *); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; @@ -474,6 +476,11 @@ struct dasd_profile { spinlock_t lock; }; +struct dasd_format_entry { + struct list_head list; + sector_t track; +}; + struct dasd_device { /* Block device stuff. */ struct dasd_block *block; @@ -539,6 +546,7 @@ struct dasd_device { struct dentry *debugfs_dentry; struct dentry *hosts_dentry; struct dasd_profile profile; + struct dasd_format_entry format_entry; }; struct dasd_block { @@ -564,6 +572,9 @@ struct dasd_block { struct dentry *debugfs_dentry; struct dasd_profile profile; + + struct list_head format_list; + spinlock_t format_lock; }; struct dasd_attention_data { diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2a3f874a21d5..9cebff8e8d74 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index dc430bd86ade..58eaac70dba7 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "cio.h" @@ -205,7 +206,7 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, /* fill in sl */ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) - q->sl->element[j].sbal = (unsigned long)q->sbal[j]; + q->sl->element[j].sbal = virt_to_phys(q->sbal[j]); } static void setup_queues(struct qdio_irq *irq_ptr, diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index bb35ba4a8d24..4348fdff1c61 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -162,7 +162,7 @@ struct ap_card { unsigned int functions; /* AP device function bitfield. */ int queue_depth; /* AP queue depth.*/ int id; /* AP card number. */ - atomic_t total_request_count; /* # requests ever for this AP device.*/ + atomic64_t total_request_count; /* # requests ever for this AP device.*/ }; #define to_ap_card(x) container_of((x), struct ap_card, ap_dev.device) @@ -179,7 +179,7 @@ struct ap_queue { enum ap_state state; /* State of the AP device. */ int pendingq_count; /* # requests on pendingq list. */ int requestq_count; /* # requests on requestq list. */ - int total_request_count; /* # requests ever for this AP device.*/ + u64 total_request_count; /* # requests ever for this AP device.*/ int request_timeout; /* Request timeout in jiffies. */ struct timer_list timeout; /* Timer for request timeouts. */ struct list_head pendingq; /* List of message sent to AP queue. */ diff --git a/drivers/s390/crypto/ap_card.c b/drivers/s390/crypto/ap_card.c index 63b4cc6cd7e5..e85bfca1ed16 100644 --- a/drivers/s390/crypto/ap_card.c +++ b/drivers/s390/crypto/ap_card.c @@ -63,13 +63,13 @@ static ssize_t request_count_show(struct device *dev, char *buf) { struct ap_card *ac = to_ap_card(dev); - unsigned int req_cnt; + u64 req_cnt; req_cnt = 0; spin_lock_bh(&ap_list_lock); - req_cnt = atomic_read(&ac->total_request_count); + req_cnt = atomic64_read(&ac->total_request_count); spin_unlock_bh(&ap_list_lock); - return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt); + return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); } static ssize_t request_count_store(struct device *dev, @@ -83,7 +83,7 @@ static ssize_t request_count_store(struct device *dev, for_each_ap_queue(aq, ac) aq->total_request_count = 0; spin_unlock_bh(&ap_list_lock); - atomic_set(&ac->total_request_count, 0); + atomic64_set(&ac->total_request_count, 0); return count; } diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 37c3bdc3642d..a317ab484932 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -479,12 +479,12 @@ static ssize_t request_count_show(struct device *dev, char *buf) { struct ap_queue *aq = to_ap_queue(dev); - unsigned int req_cnt; + u64 req_cnt; spin_lock_bh(&aq->lock); req_cnt = aq->total_request_count; spin_unlock_bh(&aq->lock); - return snprintf(buf, PAGE_SIZE, "%d\n", req_cnt); + return snprintf(buf, PAGE_SIZE, "%llu\n", req_cnt); } static ssize_t request_count_store(struct device *dev, @@ -676,7 +676,7 @@ void ap_queue_message(struct ap_queue *aq, struct ap_message *ap_msg) list_add_tail(&ap_msg->list, &aq->requestq); aq->requestq_count++; aq->total_request_count++; - atomic_inc(&aq->card->total_request_count); + atomic64_inc(&aq->card->total_request_count); /* Send/receive as many request from the queue as possible. */ ap_wait(ap_sm_event_loop(aq, AP_EVENT_POLL)); spin_unlock_bh(&aq->lock); diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c index d78d77686d7b..cda75118ccdb 100644 --- a/drivers/s390/crypto/pkey_api.c +++ b/drivers/s390/crypto/pkey_api.c @@ -774,7 +774,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; rc = cca_sec2protkey(ksp.cardnr, ksp.domain, ksp.seckey.seckey, ksp.protkey.protkey, - NULL, &ksp.protkey.type); + &ksp.protkey.len, &ksp.protkey.type); DEBUG_DBG("%s cca_sec2protkey()=%d\n", __func__, rc); if (rc) break; diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 9157e728a362..7fa0262e91af 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -605,8 +605,8 @@ static inline bool zcrypt_card_compare(struct zcrypt_card *zc, weight += atomic_read(&zc->load); pref_weight += atomic_read(&pref_zc->load); if (weight == pref_weight) - return atomic_read(&zc->card->total_request_count) > - atomic_read(&pref_zc->card->total_request_count); + return atomic64_read(&zc->card->total_request_count) > + atomic64_read(&pref_zc->card->total_request_count); return weight > pref_weight; } @@ -1216,11 +1216,12 @@ static void zcrypt_qdepth_mask(char qdepth[], size_t max_adapters) spin_unlock(&zcrypt_list_lock); } -static void zcrypt_perdev_reqcnt(int reqcnt[], size_t max_adapters) +static void zcrypt_perdev_reqcnt(u32 reqcnt[], size_t max_adapters) { struct zcrypt_card *zc; struct zcrypt_queue *zq; int card; + u64 cnt; memset(reqcnt, 0, sizeof(int) * max_adapters); spin_lock(&zcrypt_list_lock); @@ -1232,8 +1233,9 @@ static void zcrypt_perdev_reqcnt(int reqcnt[], size_t max_adapters) || card >= max_adapters) continue; spin_lock(&zq->queue->lock); - reqcnt[card] = zq->queue->total_request_count; + cnt = zq->queue->total_request_count; spin_unlock(&zq->queue->lock); + reqcnt[card] = (cnt < UINT_MAX) ? (u32) cnt : UINT_MAX; } } local_bh_enable(); @@ -1411,9 +1413,9 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, return 0; } case ZCRYPT_PERDEV_REQCNT: { - int *reqcnt; + u32 *reqcnt; - reqcnt = kcalloc(AP_DEVICES, sizeof(int), GFP_KERNEL); + reqcnt = kcalloc(AP_DEVICES, sizeof(u32), GFP_KERNEL); if (!reqcnt) return -ENOMEM; zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES); @@ -1470,7 +1472,7 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, } case Z90STAT_PERDEV_REQCNT: { /* the old ioctl supports only 64 adapters */ - int reqcnt[MAX_ZDEV_CARDIDS]; + u32 reqcnt[MAX_ZDEV_CARDIDS]; zcrypt_perdev_reqcnt(reqcnt, MAX_ZDEV_CARDIDS); if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt))) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 29facb913671..d2420707518c 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1244,7 +1244,6 @@ static int qeth_osa_set_output_queues(struct qeth_card *card, bool single) if (count == 1) dev_info(&card->gdev->dev, "Priority Queueing not supported\n"); - card->qdio.default_out_queue = single ? 0 : QETH_DEFAULT_QUEUE; card->qdio.no_out_queues = count; return 0; } @@ -2634,12 +2633,12 @@ static int qeth_init_input_buffer(struct qeth_card *card, buf->rx_skb = netdev_alloc_skb(card->dev, QETH_RX_PULL_LEN + ETH_HLEN); if (!buf->rx_skb) - return 1; + return -ENOMEM; } pool_entry = qeth_find_free_buffer_pool_entry(card); if (!pool_entry) - return 1; + return -ENOBUFS; /* * since the buffer is accessed only from the input_tasklet @@ -2683,10 +2682,15 @@ int qeth_init_qdio_queues(struct qeth_card *card) /* inbound queue */ qdio_reset_buffers(card->qdio.in_q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); memset(&card->rx, 0, sizeof(struct qeth_rx)); + qeth_initialize_working_pool_list(card); /*give only as many buffers to hardware as we have buffer pool entries*/ - for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; ++i) - qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]); + for (i = 0; i < card->qdio.in_buf_pool.buf_count - 1; i++) { + rc = qeth_init_input_buffer(card, &card->qdio.in_q->bufs[i]); + if (rc) + return rc; + } + card->qdio.in_q->next_buf_to_init = card->qdio.in_buf_pool.buf_count - 1; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, @@ -4749,10 +4753,10 @@ static void qeth_qdio_establish_cq(struct qeth_card *card, if (card->options.cq == QETH_CQ_ENABLED) { int offset = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1); - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - in_sbal_ptrs[offset + i] = (struct qdio_buffer *) - virt_to_phys(card->qdio.c_q->bufs[i].buffer); - } + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) + in_sbal_ptrs[offset + i] = + card->qdio.c_q->bufs[i].buffer; queue_start_poll[card->qdio.no_in_queues - 1] = NULL; } @@ -4786,10 +4790,9 @@ static int qeth_qdio_establish(struct qeth_card *card) rc = -ENOMEM; goto out_free_qib_param; } - for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) { - in_sbal_ptrs[i] = (struct qdio_buffer *) - virt_to_phys(card->qdio.in_q->bufs[i].buffer); - } + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) + in_sbal_ptrs[i] = card->qdio.in_q->bufs[i].buffer; queue_start_poll = kcalloc(card->qdio.no_in_queues, sizeof(void *), GFP_KERNEL); @@ -4810,11 +4813,11 @@ static int qeth_qdio_establish(struct qeth_card *card) rc = -ENOMEM; goto out_free_queue_start_poll; } + for (i = 0, k = 0; i < card->qdio.no_out_queues; ++i) - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j, ++k) { - out_sbal_ptrs[k] = (struct qdio_buffer *)virt_to_phys( - card->qdio.out_qs[i]->bufs[j]->buffer); - } + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++, k++) + out_sbal_ptrs[k] = + card->qdio.out_qs[i]->bufs[j]->buffer; memset(&init_data, 0, sizeof(struct qdio_initialize)); init_data.cdev = CARD_DDEV(card); @@ -5142,7 +5145,7 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card, } use_rx_sg = (card->options.cq == QETH_CQ_ENABLED) || - ((skb_len >= card->options.rx_sg_cb) && + (skb_len > card->options.rx_sg_cb && !atomic_read(&card->force_alloc_skb) && !IS_OSN(card)); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 47d37e75dda6..e26ad80ddfa3 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1815,15 +1815,14 @@ int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state) QETH_CARD_TEXT(card, 2, "vniccsch"); - /* do not change anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and enable/disable are supported */ if (!(card->options.vnicc.sup_chars & vnicc) || !(card->options.vnicc.set_char_sup & vnicc)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* set enable/disable command and store wanted characteristic */ if (state) { cmd = IPA_VNICC_ENABLE; @@ -1869,14 +1868,13 @@ int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state) QETH_CARD_TEXT(card, 2, "vniccgch"); - /* do not get anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic is supported */ if (!(card->options.vnicc.sup_chars & vnicc)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* if card is ready, query current VNICC state */ if (qeth_card_hw_is_reachable(card)) rc = qeth_l2_vnicc_query_chars(card); @@ -1894,15 +1892,14 @@ int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout) QETH_CARD_TEXT(card, 2, "vniccsto"); - /* do not change anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and set_timeout are supported */ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) return -EOPNOTSUPP; + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* do we need to do anything? */ if (card->options.vnicc.learning_timeout == timeout) return rc; @@ -1931,14 +1928,14 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout) QETH_CARD_TEXT(card, 2, "vniccgto"); - /* do not get anything if BridgePort is enabled */ - if (qeth_bridgeport_is_in_use(card)) - return -EBUSY; - /* check if characteristic and get_timeout are supported */ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) || !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING)) return -EOPNOTSUPP; + + if (qeth_bridgeport_is_in_use(card)) + return -EBUSY; + /* if card is ready, get timeout. Otherwise, just return stored value */ *timeout = card->options.vnicc.learning_timeout; if (qeth_card_hw_is_reachable(card)) diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 2b1e4da1944f..4bfb79f20588 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -410,7 +410,7 @@ struct fsf_qtcb_bottom_port { u8 cb_util; u8 a_util; u8 res2; - u16 temperature; + s16 temperature; u16 vcc; u16 tx_bias; u16 tx_power; diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 494b9fe9cc94..a711a0d15100 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -800,7 +800,7 @@ static ZFCP_DEV_ATTR(adapter_diag, b2b_credit, 0400, static ZFCP_DEV_ATTR(adapter_diag_sfp, _name, 0400, \ zfcp_sysfs_adapter_diag_sfp_##_name##_show, NULL) -ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 5, "%hu"); +ZFCP_DEFINE_DIAG_SFP_ATTR(temperature, temperature, 6, "%hd"); ZFCP_DEFINE_DIAG_SFP_ATTR(vcc, vcc, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_bias, tx_bias, 5, "%hu"); ZFCP_DEFINE_DIAG_SFP_ATTR(tx_power, tx_power, 5, "%hu"); diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index a9d40d3b90ef..4190a025381a 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2314,7 +2314,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period, * At some speeds, we only support * ST transfers. */ - if ((syncrate->sxfr_u2 & ST_SXFR) != 0) + if ((syncrate->sxfr_u2 & ST_SXFR) != 0) *ppr_options &= ~MSG_EXT_PPR_DT_REQ; break; } diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 469d0bc9f5fe..00cf33573136 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev, return -EINVAL; /* Delete NPIV lnodes */ - csio_lnodes_exit(hw, 1); + csio_lnodes_exit(hw, 1); /* Block upper IOs */ csio_lnodes_block_request(hw); diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 0bc63a7ab41c..b5dd1caae5e9 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -887,6 +887,10 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = cls_session->dd_data; + + if (WARN_ON_ONCE(session->leadconn)) + return; iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 9c5f7c9178c6..2b865c6423e2 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -628,6 +628,8 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 99c9bb249758..1b4dbb28fb41 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -1493,33 +1493,35 @@ int lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol, size_t size) { - char fwrev[FW_REV_STR_SIZE]; - int n; + char fwrev[FW_REV_STR_SIZE] = {0}; + char tmp[MAXHOSTNAMELEN] = {0}; - lpfc_decode_firmware_rev(vport->phba, fwrev, 0); + memset(symbol, 0, size); - n = scnprintf(symbol, size, "Emulex %s", vport->phba->ModelName); - if (size < n) - return n; + scnprintf(tmp, sizeof(tmp), "Emulex %s", vport->phba->ModelName); + if (strlcat(symbol, tmp, size) >= size) + goto buffer_done; - n += scnprintf(symbol + n, size - n, " FV%s", fwrev); - if (size < n) - return n; + lpfc_decode_firmware_rev(vport->phba, fwrev, 0); + scnprintf(tmp, sizeof(tmp), " FV%s", fwrev); + if (strlcat(symbol, tmp, size) >= size) + goto buffer_done; - n += scnprintf(symbol + n, size - n, " DV%s.", - lpfc_release_version); - if (size < n) - return n; + scnprintf(tmp, sizeof(tmp), " DV%s", lpfc_release_version); + if (strlcat(symbol, tmp, size) >= size) + goto buffer_done; - n += scnprintf(symbol + n, size - n, " HN:%s.", - init_utsname()->nodename); - if (size < n) - return n; + scnprintf(tmp, sizeof(tmp), " HN:%s", init_utsname()->nodename); + if (strlcat(symbol, tmp, size) >= size) + goto buffer_done; /* Note :- OS name is "Linux" */ - n += scnprintf(symbol + n, size - n, " OS:%s", - init_utsname()->sysname); - return n; + scnprintf(tmp, sizeof(tmp), " OS:%s", init_utsname()->sysname); + strlcat(symbol, tmp, size); + +buffer_done: + return strnlen(symbol, size); + } static uint32_t diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index a4bc81479284..15b3d9050804 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -4392,7 +4392,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) { if (instance->adapter_type == MFI_SERIES) return KILL_ADAPTER; else if (instance->unload || - test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) + test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, + &instance->reset_flags)) return IGNORE_TIMEOUT; else return INITIATE_OCR; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index e301458bcbae..d86838801805 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -594,7 +594,8 @@ megasas_alloc_request_fusion(struct megasas_instance *instance) fusion->io_request_frames = dma_pool_alloc(fusion->io_request_frames_pool, - GFP_KERNEL, &fusion->io_request_frames_phys); + GFP_KERNEL | __GFP_NOWARN, + &fusion->io_request_frames_phys); if (!fusion->io_request_frames) { if (instance->max_fw_cmds >= (MEGASAS_REDUCE_QD_COUNT * 2)) { instance->max_fw_cmds -= MEGASAS_REDUCE_QD_COUNT; @@ -632,7 +633,7 @@ megasas_alloc_request_fusion(struct megasas_instance *instance) fusion->io_request_frames = dma_pool_alloc(fusion->io_request_frames_pool, - GFP_KERNEL, + GFP_KERNEL | __GFP_NOWARN, &fusion->io_request_frames_phys); if (!fusion->io_request_frames) { @@ -4847,6 +4848,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); instance->instancet->disable_intr(instance); megasas_sync_irqs((unsigned long)instance); @@ -5046,7 +5048,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) instance->skip_heartbeat_timer_del = 1; retval = FAILED; out: - clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); mutex_unlock(&instance->reset_mutex); return retval; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index c013c80fe4e6..dd2e37e40d6b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -89,6 +89,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { #define MEGASAS_FP_CMD_LEN 16 #define MEGASAS_FUSION_IN_RESET 0 +#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1 #define RAID_1_PEER_CMDS 2 #define JBOD_MAPS_COUNT 2 #define MEGASAS_REDUCE_QD_COUNT 64 diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 30afc59c1870..7bbff91f8883 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2519,12 +2519,6 @@ qla83xx_fw_dump(scsi_qla_host_t *vha, int hardware_locked) /* Driver Debug Functions. */ /****************************************************************************/ -static inline int -ql_mask_match(uint level) -{ - return (level & ql2xextended_error_logging) == level; -} - /* * This function is for formatting and logging debug information. * It is to be used when vha is available. It formats the message diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index bb01b680ce9f..433e95502808 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, struct qla_hw_data *); extern int qla24xx_soft_reset(struct qla_hw_data *); + +static inline int +ql_mask_match(uint level) +{ + return (level & ql2xextended_error_logging) == level; +} diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 2edd9f7b3074..c18e2864de3a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2402,6 +2402,7 @@ typedef struct fc_port { unsigned int scan_needed:1; unsigned int n2n_flag:1; unsigned int explicit_logout:1; + unsigned int prli_pend_timer:1; struct completion nvme_del_done; uint32_t nvme_prli_service_param; @@ -2428,6 +2429,7 @@ typedef struct fc_port { struct work_struct free_work; struct work_struct reg_work; uint64_t jiffies_at_registration; + unsigned long prli_expired; struct qlt_plogi_ack_t *plogi_link[QLT_PLOGI_LINK_MAX]; uint16_t tgt_id; @@ -4845,6 +4847,9 @@ struct sff_8247_a0 { (ha->fc4_type_priority == FC4_PRIORITY_NVME)) || \ NVME_ONLY_TARGET(fcport)) \ +#define PRLI_PHASE(_cls) \ + ((_cls == DSC_LS_PRLI_PEND) || (_cls == DSC_LS_PRLI_COMP)) + #include "qla_target.h" #include "qla_gbl.h" #include "qla_dbg.h" diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index aa5204163bec..1841d4ad2cb2 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -685,7 +685,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, port_id_t id; u64 wwn; u16 data[2]; - u8 current_login_state; + u8 current_login_state, nvme_cls; fcport = ea->fcport; ql_dbg(ql_dbg_disc, vha, 0xffff, @@ -744,10 +744,17 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha, loop_id = le16_to_cpu(e->nport_handle); loop_id = (loop_id & 0x7fff); - if (NVME_TARGET(vha->hw, fcport)) - current_login_state = e->current_login_state >> 4; - else - current_login_state = e->current_login_state & 0xf; + nvme_cls = e->current_login_state >> 4; + current_login_state = e->current_login_state & 0xf; + + if (PRLI_PHASE(nvme_cls)) { + current_login_state = nvme_cls; + fcport->fc4_type &= ~FS_FC4TYPE_FCP; + fcport->fc4_type |= FS_FC4TYPE_NVME; + } else if (PRLI_PHASE(current_login_state)) { + fcport->fc4_type |= FS_FC4TYPE_FCP; + fcport->fc4_type &= ~FS_FC4TYPE_NVME; + } ql_dbg(ql_dbg_disc, vha, 0x20e2, "%s found %8phC CLS [%x|%x] fc4_type %d ID[%06x|%06x] lid[%d|%d]\n", @@ -1216,12 +1223,19 @@ qla24xx_async_prli(struct scsi_qla_host *vha, fc_port_t *fcport) struct srb_iocb *lio; int rval = QLA_FUNCTION_FAILED; - if (!vha->flags.online) + if (!vha->flags.online) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", + __func__, __LINE__, fcport->port_name); return rval; + } - if (fcport->fw_login_state == DSC_LS_PLOGI_PEND || - fcport->fw_login_state == DSC_LS_PRLI_PEND) + if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND || + fcport->fw_login_state == DSC_LS_PRLI_PEND) && + qla_dual_mode_enabled(vha)) { + ql_dbg(ql_dbg_disc, vha, 0xffff, "%s %d %8phC exit\n", + __func__, __LINE__, fcport->port_name); return rval; + } sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) @@ -1600,6 +1614,10 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) break; default: if (fcport->login_pause) { + ql_dbg(ql_dbg_disc, vha, 0x20d8, + "%s %d %8phC exit\n", + __func__, __LINE__, + fcport->port_name); fcport->last_rscn_gen = fcport->rscn_gen; fcport->last_login_gen = fcport->login_gen; set_bit(RELOGIN_NEEDED, &vha->dpc_flags); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 7b8a6bfcf08d..9f9124550415 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1918,6 +1918,18 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, inbuf = (uint32_t *)&sts->nvme_ersp_data; outbuf = (uint32_t *)fd->rspaddr; iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); + if (unlikely(iocb->u.nvme.rsp_pyld_len > + sizeof(struct nvme_fc_ersp_iu))) { + if (ql_mask_match(ql_dbg_io)) { + WARN_ONCE(1, "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + ql_log(ql_log_warn, fcport->vha, 0x5100, + "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + } + iocb->u.nvme.rsp_pyld_len = + sizeof(struct nvme_fc_ersp_iu); + } iter = iocb->u.nvme.rsp_pyld_len >> 2; for (; iter; iter--) *outbuf++ = swab32(*inbuf++); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index b7c1108c48e2..9e09964f5c0e 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -6152,9 +6152,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, mcp->mb[7] = LSW(MSD(req_dma)); mcp->mb[8] = MSW(addr); /* Setting RAM ID to valid */ - mcp->mb[10] |= BIT_7; /* For MCTP RAM ID is 0x40 */ - mcp->mb[10] |= 0x40; + mcp->mb[10] = BIT_7 | 0x40; mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1| MBX_0; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 2b2028f2383e..c855d013ba8a 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1612,8 +1612,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) return (u8 *)&ha->hablob->fw->data[offset]; } -static __le32 -qla82xx_get_fw_size(struct qla_hw_data *ha) +static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) { struct qla82xx_uri_data_desc *uri_desc = NULL; @@ -1624,7 +1623,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha) return cpu_to_le32(uri_desc->size); } - return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]); + return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); } static u8 * @@ -1816,7 +1815,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha) } flashaddr = FLASH_ADDR_START; - size = (__force u32)qla82xx_get_fw_size(ha) / 8; + size = qla82xx_get_fw_size(ha) / 8; ptr64 = (u64 *)qla82xx_get_fw_offs(ha); for (i = 0; i < size; i++) { diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 68c14143e50e..f67a5c4dacd0 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1257,6 +1257,7 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; spin_unlock_irqrestore(&sess->vha->work_lock, flags); + sess->prli_pend_timer = 0; sess->disc_state = DSC_DELETE_PEND; qla24xx_chk_fcp_state(sess); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 2323432a0edb..5504ab11decc 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4145,7 +4145,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues, ha->queues_dma); - if (ha->fw_dump) + if (ha->fw_dump) vfree(ha->fw_dump); ha->queues_len = 0; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index ed8d9709b9b9..271afea654e2 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2947,6 +2947,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_session_has_conns(int sid) +{ + struct iscsi_cls_conn *conn; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + if (iscsi_conn_get_sid(conn) == sid) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&connlock, flags); + + return found; +} + static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) @@ -3524,10 +3542,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); - if (session) - transport->destroy_session(session); - else + if (!session) err = -EINVAL; + else if (iscsi_session_has_conns(ev->u.d_session.sid)) + err = -EBUSY; + else + transport->destroy_session(session); break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index e0bd4cf17230..5b75a65103bd 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -161,6 +161,7 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data) { struct scsi_disk *sdkp = scsi_disk(disk); + sector_t capacity = logical_to_sectors(sdkp->device, sdkp->capacity); unsigned int nr, i; unsigned char *buf; size_t offset, buflen = 0; @@ -171,11 +172,15 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, /* Not a zoned device */ return -EOPNOTSUPP; + if (!capacity) + /* Device gone or invalid */ + return -ENODEV; + buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen); if (!buf) return -ENOMEM; - while (zone_idx < nr_zones && sector < get_capacity(disk)) { + while (zone_idx < nr_zones && sector < capacity) { ret = sd_zbc_do_report_zones(sdkp, buf, buflen, sectors_to_logical(sdkp->device, sector), true); if (ret) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 160748ad9c0f..eace8886d95a 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -405,6 +405,38 @@ sg_release(struct inode *inode, struct file *filp) return 0; } +static int get_sg_io_pack_id(int *pack_id, void __user *buf, size_t count) +{ + struct sg_header __user *old_hdr = buf; + int reply_len; + + if (count >= SZ_SG_HEADER) { + /* negative reply_len means v3 format, otherwise v1/v2 */ + if (get_user(reply_len, &old_hdr->reply_len)) + return -EFAULT; + + if (reply_len >= 0) + return get_user(*pack_id, &old_hdr->pack_id); + + if (in_compat_syscall() && + count >= sizeof(struct compat_sg_io_hdr)) { + struct compat_sg_io_hdr __user *hp = buf; + + return get_user(*pack_id, &hp->pack_id); + } + + if (count >= sizeof(struct sg_io_hdr)) { + struct sg_io_hdr __user *hp = buf; + + return get_user(*pack_id, &hp->pack_id); + } + } + + /* no valid header was passed, so ignore the pack_id */ + *pack_id = -1; + return 0; +} + static ssize_t sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) { @@ -413,8 +445,8 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) Sg_request *srp; int req_pack_id = -1; sg_io_hdr_t *hp; - struct sg_header *old_hdr = NULL; - int retval = 0; + struct sg_header *old_hdr; + int retval; /* * This could cause a response to be stranded. Close the associated @@ -429,79 +461,34 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_read: count=%d\n", (int) count)); - if (sfp->force_packid && (count >= SZ_SG_HEADER)) { - old_hdr = memdup_user(buf, SZ_SG_HEADER); - if (IS_ERR(old_hdr)) - return PTR_ERR(old_hdr); - if (old_hdr->reply_len < 0) { - if (count >= SZ_SG_IO_HDR) { - /* - * This is stupid. - * - * We're copying the whole sg_io_hdr_t from user - * space just to get the 'pack_id' field. But the - * field is at different offsets for the compat - * case, so we'll use "get_sg_io_hdr()" to copy - * the whole thing and convert it. - * - * We could do something like just calculating the - * offset based of 'in_compat_syscall()', but the - * 'compat_sg_io_hdr' definition is in the wrong - * place for that. - */ - sg_io_hdr_t *new_hdr; - new_hdr = kmalloc(SZ_SG_IO_HDR, GFP_KERNEL); - if (!new_hdr) { - retval = -ENOMEM; - goto free_old_hdr; - } - retval = get_sg_io_hdr(new_hdr, buf); - req_pack_id = new_hdr->pack_id; - kfree(new_hdr); - if (retval) { - retval = -EFAULT; - goto free_old_hdr; - } - } - } else - req_pack_id = old_hdr->pack_id; - } + if (sfp->force_packid) + retval = get_sg_io_pack_id(&req_pack_id, buf, count); + if (retval) + return retval; + srp = sg_get_rq_mark(sfp, req_pack_id); if (!srp) { /* now wait on packet to arrive */ - if (atomic_read(&sdp->detaching)) { - retval = -ENODEV; - goto free_old_hdr; - } - if (filp->f_flags & O_NONBLOCK) { - retval = -EAGAIN; - goto free_old_hdr; - } + if (atomic_read(&sdp->detaching)) + return -ENODEV; + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; retval = wait_event_interruptible(sfp->read_wait, (atomic_read(&sdp->detaching) || (srp = sg_get_rq_mark(sfp, req_pack_id)))); - if (atomic_read(&sdp->detaching)) { - retval = -ENODEV; - goto free_old_hdr; - } - if (retval) { + if (atomic_read(&sdp->detaching)) + return -ENODEV; + if (retval) /* -ERESTARTSYS as signal hit process */ - goto free_old_hdr; - } - } - if (srp->header.interface_id != '\0') { - retval = sg_new_read(sfp, buf, count, srp); - goto free_old_hdr; + return retval; } + if (srp->header.interface_id != '\0') + return sg_new_read(sfp, buf, count, srp); hp = &srp->header; - if (old_hdr == NULL) { - old_hdr = kmalloc(SZ_SG_HEADER, GFP_KERNEL); - if (! old_hdr) { - retval = -ENOMEM; - goto free_old_hdr; - } - } - memset(old_hdr, 0, SZ_SG_HEADER); + old_hdr = kzalloc(SZ_SG_HEADER, GFP_KERNEL); + if (!old_hdr) + return -ENOMEM; + old_hdr->reply_len = (int) hp->timeout; old_hdr->pack_len = old_hdr->reply_len; /* old, strange behaviour */ old_hdr->pack_id = hp->pack_id; @@ -575,7 +562,12 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) int err = 0, err2; int len; - if (count < SZ_SG_IO_HDR) { + if (in_compat_syscall()) { + if (count < sizeof(struct compat_sg_io_hdr)) { + err = -EINVAL; + goto err_out; + } + } else if (count < SZ_SG_IO_HDR) { err = -EINVAL; goto err_out; } diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 83e28edc3ac5..8a21f49caf0d 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -13,6 +13,7 @@ #include "ufshcd.h" #include "ufshcd-pltfrm.h" +#include "ufs_quirks.h" #include "unipro.h" #include "ufs-mediatek.h" @@ -289,6 +290,15 @@ static int ufs_mtk_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) return 0; } +static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba, + struct ufs_dev_desc *card) +{ + if (card->wmanufacturerid == UFS_VENDOR_SAMSUNG) + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TACTIVATE), 6); + + return 0; +} + /** * struct ufs_hba_mtk_vops - UFS MTK specific variant operations * @@ -301,6 +311,7 @@ static struct ufs_hba_variant_ops ufs_hba_mtk_vops = { .setup_clocks = ufs_mtk_setup_clocks, .link_startup_notify = ufs_mtk_link_startup_notify, .pwr_change_notify = ufs_mtk_pwr_change_notify, + .apply_dev_quirks = ufs_mtk_apply_dev_quirks, .suspend = ufs_mtk_suspend, .resume = ufs_mtk_resume, }; diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index c69c29a1ceb9..ebb5c66e069f 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -949,7 +949,8 @@ static int ufs_qcom_quirk_host_pa_saveconfigtime(struct ufs_hba *hba) return err; } -static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba) +static int ufs_qcom_apply_dev_quirks(struct ufs_hba *hba, + struct ufs_dev_desc *card) { int err = 0; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b5966faf3e98..5340a980d24b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4799,7 +4799,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if (host_byte(result) != DID_OK) + if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5053,6 +5053,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); + hba->is_urgent_bkops_lvl_checked = false; out: return err; } @@ -5077,6 +5078,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba) hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS; ufshcd_disable_auto_bkops(hba); } + hba->is_urgent_bkops_lvl_checked = false; } static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status) @@ -5123,6 +5125,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba, err = ufshcd_enable_auto_bkops(hba); else err = ufshcd_disable_auto_bkops(hba); + hba->urgent_bkops_lvl = curr_status; out: return err; } @@ -5348,8 +5351,8 @@ static void ufshcd_err_handler(struct work_struct *work) /* * if host reset is required then skip clearing the pending - * transfers forcefully because they will automatically get - * cleared after link startup. + * transfers forcefully because they will get cleared during + * host reset and restore */ if (needs_reset) goto skip_pending_xfer_clear; @@ -6279,9 +6282,15 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) int err; unsigned long flags; - /* Reset the host controller */ + /* + * Stop the host controller and complete the requests + * cleared by h/w + */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_hba_stop(hba, false); + hba->silence_err_logs = true; + ufshcd_complete_requests(hba); + hba->silence_err_logs = false; spin_unlock_irqrestore(hba->host->host_lock, flags); /* scale up clocks to max frequency before full reinitialization */ @@ -6315,7 +6324,6 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) static int ufshcd_reset_and_restore(struct ufs_hba *hba) { int err = 0; - unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; do { @@ -6325,15 +6333,6 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba) err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); - /* - * After reset the door-bell might be cleared, complete - * outstanding requests in s/w here. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_transfer_req_compl(hba); - ufshcd_tmc_handler(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return err; } @@ -6799,7 +6798,8 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) return ret; } -static void ufshcd_tune_unipro_params(struct ufs_hba *hba) +static void ufshcd_tune_unipro_params(struct ufs_hba *hba, + struct ufs_dev_desc *card) { if (ufshcd_is_unipro_pa_params_tuning_req(hba)) { ufshcd_tune_pa_tactivate(hba); @@ -6813,7 +6813,7 @@ static void ufshcd_tune_unipro_params(struct ufs_hba *hba) if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE) ufshcd_quirk_tune_host_pa_tactivate(hba); - ufshcd_vops_apply_dev_quirks(hba); + ufshcd_vops_apply_dev_quirks(hba, card); } static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) @@ -6976,10 +6976,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) } ufs_fixup_device_setup(hba, &card); + ufshcd_tune_unipro_params(hba, &card); ufs_put_device_desc(&card); - ufshcd_tune_unipro_params(hba); - /* UFS device is also active now */ ufshcd_set_ufs_dev_active(hba); ufshcd_force_reset_auto_bkops(hba); @@ -7027,7 +7026,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) ufshcd_init_icc_levels(hba); /* Add required well known logical units to scsi mid layer */ - if (ufshcd_scsi_add_wlus(hba)) + ret = ufshcd_scsi_add_wlus(hba); + if (ret) goto out; /* Initialize devfreq after UFS device is detected */ diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 2740f6941ec6..46bec0e18c72 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -322,7 +322,7 @@ struct ufs_hba_variant_ops { void (*setup_task_mgmt)(struct ufs_hba *, int, u8); void (*hibern8_notify)(struct ufs_hba *, enum uic_cmd_dme, enum ufs_notify_change_status); - int (*apply_dev_quirks)(struct ufs_hba *); + int (*apply_dev_quirks)(struct ufs_hba *, struct ufs_dev_desc *); int (*suspend)(struct ufs_hba *, enum ufs_pm_op); int (*resume)(struct ufs_hba *, enum ufs_pm_op); void (*dbg_register_dump)(struct ufs_hba *hba); @@ -513,6 +513,7 @@ struct ufs_stats { * @uic_error: UFS interconnect layer error status * @saved_err: sticky error mask * @saved_uic_err: sticky UIC error mask + * @silence_err_logs: flag to silence error logs * @dev_cmd: ufs device management command information * @last_dme_cmd_tstamp: time stamp of the last completed DME command * @auto_bkops_enabled: to track whether bkops is enabled in device @@ -670,6 +671,7 @@ struct ufs_hba { u32 saved_err; u32 saved_uic_err; struct ufs_stats ufs_stats; + bool silence_err_logs; /* Device management request data */ struct ufs_dev_cmd dev_cmd; @@ -1055,10 +1057,11 @@ static inline void ufshcd_vops_hibern8_notify(struct ufs_hba *hba, return hba->vops->hibern8_notify(hba, cmd, status); } -static inline int ufshcd_vops_apply_dev_quirks(struct ufs_hba *hba) +static inline int ufshcd_vops_apply_dev_quirks(struct ufs_hba *hba, + struct ufs_dev_desc *card) { if (hba->vops && hba->vops->apply_dev_quirks) - return hba->vops->apply_dev_quirks(hba); + return hba->vops->apply_dev_quirks(hba, card); return 0; } diff --git a/drivers/soc/imx/soc-imx-scu.c b/drivers/soc/imx/soc-imx-scu.c index fb70b8a3f7c5..20d37eaeb5f2 100644 --- a/drivers/soc/imx/soc-imx-scu.c +++ b/drivers/soc/imx/soc-imx-scu.c @@ -25,7 +25,7 @@ struct imx_sc_msg_misc_get_soc_id { u32 id; } resp; } data; -} __packed; +} __packed __aligned(4); struct imx_sc_msg_misc_get_soc_uid { struct imx_sc_rpc_msg hdr; diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index 5741ec3fa814..51850cc68b70 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -93,6 +93,7 @@ static struct rpmhpd sdm845_mx = { static struct rpmhpd sdm845_mx_ao = { .pd = { .name = "mx_ao", }, + .active_only = true, .peer = &sdm845_mx, .res_name = "mx.lvl", }; @@ -107,6 +108,7 @@ static struct rpmhpd sdm845_cx = { static struct rpmhpd sdm845_cx_ao = { .pd = { .name = "cx_ao", }, + .active_only = true, .peer = &sdm845_cx, .parent = &sdm845_mx_ao.pd, .res_name = "cx.lvl", diff --git a/drivers/soc/tegra/fuse/fuse-tegra30.c b/drivers/soc/tegra/fuse/fuse-tegra30.c index b8daaf5b7291..efd158b4607c 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra30.c +++ b/drivers/soc/tegra/fuse/fuse-tegra30.c @@ -36,7 +36,8 @@ defined(CONFIG_ARCH_TEGRA_124_SOC) || \ defined(CONFIG_ARCH_TEGRA_132_SOC) || \ defined(CONFIG_ARCH_TEGRA_210_SOC) || \ - defined(CONFIG_ARCH_TEGRA_186_SOC) + defined(CONFIG_ARCH_TEGRA_186_SOC) || \ + defined(CONFIG_ARCH_TEGRA_194_SOC) static u32 tegra30_fuse_read_early(struct tegra_fuse *fuse, unsigned int offset) { if (WARN_ON(!fuse->base)) diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index df76778af601..f8b9c4058926 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -123,7 +123,7 @@ void __init tegra_init_apbmisc(void) apbmisc.flags = IORESOURCE_MEM; /* strapping options */ - if (tegra_get_chip_id() == TEGRA124) { + if (of_machine_is_compatible("nvidia,tegra124")) { straps.start = 0x7000e864; straps.end = 0x7000e867; } else { diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index fd8007ebb145..13def7f78b9e 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -149,6 +149,7 @@ struct atmel_qspi { struct clk *qspick; struct platform_device *pdev; const struct atmel_qspi_caps *caps; + resource_size_t mmap_size; u32 pending; u32 mr; u32 scr; @@ -329,6 +330,14 @@ static int atmel_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) u32 sr, offset; int err; + /* + * Check if the address exceeds the MMIO window size. An improvement + * would be to add support for regular SPI mode and fall back to it + * when the flash memories overrun the controller's memory space. + */ + if (op->addr.val + op->data.nbytes > aq->mmap_size) + return -ENOTSUPP; + err = atmel_qspi_set_cfg(aq, op, &offset); if (err) return err; @@ -480,6 +489,8 @@ static int atmel_qspi_probe(struct platform_device *pdev) goto exit; } + aq->mmap_size = resource_size(res); + /* Get the peripheral clock */ aq->pclk = devm_clk_get(&pdev->dev, "pclk"); if (IS_ERR(aq->pclk)) diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c index 7327309ea3d5..6c235306c0e4 100644 --- a/drivers/spi/spi-bcm63xx-hsspi.c +++ b/drivers/spi/spi-bcm63xx-hsspi.c @@ -366,7 +366,6 @@ static int bcm63xx_hsspi_probe(struct platform_device *pdev) goto out_disable_clk; rate = clk_get_rate(pll_clk); - clk_disable_unprepare(pll_clk); if (!rate) { ret = -EINVAL; goto out_disable_pll_clk; diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 2cc0ddb4a988..1375bdfc587b 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -862,6 +862,22 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->dev = &pdev->dev; fsl_lpspi->is_slave = is_slave; + controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32); + controller->transfer_one = fsl_lpspi_transfer_one; + controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware; + controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware; + controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; + controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX; + controller->dev.of_node = pdev->dev.of_node; + controller->bus_num = pdev->id; + controller->slave_abort = fsl_lpspi_slave_abort; + + ret = devm_spi_register_controller(&pdev->dev, controller); + if (ret < 0) { + dev_err(&pdev->dev, "spi_register_controller error.\n"); + goto out_controller_put; + } + if (!fsl_lpspi->is_slave) { for (i = 0; i < controller->num_chipselect; i++) { int cs_gpio = of_get_named_gpio(np, "cs-gpios", i); @@ -885,16 +901,6 @@ static int fsl_lpspi_probe(struct platform_device *pdev) controller->prepare_message = fsl_lpspi_prepare_message; } - controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32); - controller->transfer_one = fsl_lpspi_transfer_one; - controller->prepare_transfer_hardware = lpspi_prepare_xfer_hardware; - controller->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware; - controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH; - controller->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX; - controller->dev.of_node = pdev->dev.of_node; - controller->bus_num = pdev->id; - controller->slave_abort = fsl_lpspi_slave_abort; - init_completion(&fsl_lpspi->xfer_done); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -952,12 +958,6 @@ static int fsl_lpspi_probe(struct platform_device *pdev) if (ret < 0) dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret); - ret = devm_spi_register_controller(&pdev->dev, controller); - if (ret < 0) { - dev_err(&pdev->dev, "spi_register_controller error.\n"); - goto out_controller_put; - } - return 0; out_controller_put: diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c index 79b1558b74b8..e8a499cd1f13 100644 --- a/drivers/spi/spi-fsl-qspi.c +++ b/drivers/spi/spi-fsl-qspi.c @@ -410,7 +410,7 @@ static bool fsl_qspi_supports_op(struct spi_mem *mem, op->data.nbytes > q->devtype_data->txfifo) return false; - return true; + return spi_mem_default_supports_op(mem, op); } static void fsl_qspi_prepare_lut(struct fsl_qspi *q, diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 7e2292c11d12..7e781c8a5ee5 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -974,20 +974,12 @@ static int omap2_mcspi_setup_transfer(struct spi_device *spi, * Note that we currently allow DMA only if we get a channel * for both rx and tx. Otherwise we'll do PIO for both rx and tx. */ -static int omap2_mcspi_request_dma(struct spi_device *spi) +static int omap2_mcspi_request_dma(struct omap2_mcspi *mcspi, + struct omap2_mcspi_dma *mcspi_dma) { - struct spi_master *master = spi->master; - struct omap2_mcspi *mcspi; - struct omap2_mcspi_dma *mcspi_dma; int ret = 0; - mcspi = spi_master_get_devdata(master); - mcspi_dma = mcspi->dma_channels + spi->chip_select; - - init_completion(&mcspi_dma->dma_rx_completion); - init_completion(&mcspi_dma->dma_tx_completion); - - mcspi_dma->dma_rx = dma_request_chan(&master->dev, + mcspi_dma->dma_rx = dma_request_chan(mcspi->dev, mcspi_dma->dma_rx_ch_name); if (IS_ERR(mcspi_dma->dma_rx)) { ret = PTR_ERR(mcspi_dma->dma_rx); @@ -995,7 +987,7 @@ static int omap2_mcspi_request_dma(struct spi_device *spi) goto no_dma; } - mcspi_dma->dma_tx = dma_request_chan(&master->dev, + mcspi_dma->dma_tx = dma_request_chan(mcspi->dev, mcspi_dma->dma_tx_ch_name); if (IS_ERR(mcspi_dma->dma_tx)) { ret = PTR_ERR(mcspi_dma->dma_tx); @@ -1004,20 +996,40 @@ static int omap2_mcspi_request_dma(struct spi_device *spi) mcspi_dma->dma_rx = NULL; } + init_completion(&mcspi_dma->dma_rx_completion); + init_completion(&mcspi_dma->dma_tx_completion); + no_dma: return ret; } +static void omap2_mcspi_release_dma(struct spi_master *master) +{ + struct omap2_mcspi *mcspi = spi_master_get_devdata(master); + struct omap2_mcspi_dma *mcspi_dma; + int i; + + for (i = 0; i < master->num_chipselect; i++) { + mcspi_dma = &mcspi->dma_channels[i]; + + if (mcspi_dma->dma_rx) { + dma_release_channel(mcspi_dma->dma_rx); + mcspi_dma->dma_rx = NULL; + } + if (mcspi_dma->dma_tx) { + dma_release_channel(mcspi_dma->dma_tx); + mcspi_dma->dma_tx = NULL; + } + } +} + static int omap2_mcspi_setup(struct spi_device *spi) { int ret; struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master); struct omap2_mcspi_regs *ctx = &mcspi->ctx; - struct omap2_mcspi_dma *mcspi_dma; struct omap2_mcspi_cs *cs = spi->controller_state; - mcspi_dma = &mcspi->dma_channels[spi->chip_select]; - if (!cs) { cs = kzalloc(sizeof *cs, GFP_KERNEL); if (!cs) @@ -1042,13 +1054,6 @@ static int omap2_mcspi_setup(struct spi_device *spi) } } - if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx) { - ret = omap2_mcspi_request_dma(spi); - if (ret) - dev_warn(&spi->dev, "not using DMA for McSPI (%d)\n", - ret); - } - ret = pm_runtime_get_sync(mcspi->dev); if (ret < 0) { pm_runtime_put_noidle(mcspi->dev); @@ -1065,12 +1070,8 @@ static int omap2_mcspi_setup(struct spi_device *spi) static void omap2_mcspi_cleanup(struct spi_device *spi) { - struct omap2_mcspi *mcspi; - struct omap2_mcspi_dma *mcspi_dma; struct omap2_mcspi_cs *cs; - mcspi = spi_master_get_devdata(spi->master); - if (spi->controller_state) { /* Unlink controller state from context save list */ cs = spi->controller_state; @@ -1079,19 +1080,6 @@ static void omap2_mcspi_cleanup(struct spi_device *spi) kfree(cs); } - if (spi->chip_select < spi->master->num_chipselect) { - mcspi_dma = &mcspi->dma_channels[spi->chip_select]; - - if (mcspi_dma->dma_rx) { - dma_release_channel(mcspi_dma->dma_rx); - mcspi_dma->dma_rx = NULL; - } - if (mcspi_dma->dma_tx) { - dma_release_channel(mcspi_dma->dma_tx); - mcspi_dma->dma_tx = NULL; - } - } - if (gpio_is_valid(spi->cs_gpio)) gpio_free(spi->cs_gpio); } @@ -1302,6 +1290,9 @@ static bool omap2_mcspi_can_dma(struct spi_master *master, if (spi_controller_is_slave(master)) return true; + master->dma_rx = mcspi_dma->dma_rx; + master->dma_tx = mcspi_dma->dma_tx; + return (xfer->len >= DMA_MIN_BYTES); } @@ -1464,6 +1455,11 @@ static int omap2_mcspi_probe(struct platform_device *pdev) for (i = 0; i < master->num_chipselect; i++) { sprintf(mcspi->dma_channels[i].dma_rx_ch_name, "rx%d", i); sprintf(mcspi->dma_channels[i].dma_tx_ch_name, "tx%d", i); + + status = omap2_mcspi_request_dma(mcspi, + &mcspi->dma_channels[i]); + if (status == -EPROBE_DEFER) + goto free_master; } status = platform_get_irq(pdev, 0); @@ -1501,6 +1497,7 @@ static int omap2_mcspi_probe(struct platform_device *pdev) pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); free_master: + omap2_mcspi_release_dma(master); spi_master_put(master); return status; } @@ -1510,6 +1507,8 @@ static int omap2_mcspi_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct omap2_mcspi *mcspi = spi_master_get_devdata(master); + omap2_mcspi_release_dma(master); + pm_runtime_dont_use_autosuspend(mcspi->dev); pm_runtime_put_sync(mcspi->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 9071333ebdd8..cabd1a85d71e 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -70,6 +70,10 @@ MODULE_ALIAS("platform:pxa2xx-spi"); #define LPSS_CAPS_CS_EN_SHIFT 9 #define LPSS_CAPS_CS_EN_MASK (0xf << LPSS_CAPS_CS_EN_SHIFT) +#define LPSS_PRIV_CLOCK_GATE 0x38 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK 0x3 +#define LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON 0x3 + struct lpss_config { /* LPSS offset from drv_data->ioaddr */ unsigned offset; @@ -86,6 +90,8 @@ struct lpss_config { unsigned cs_sel_shift; unsigned cs_sel_mask; unsigned cs_num; + /* Quirks */ + unsigned cs_clk_stays_gated : 1; }; /* Keep these sorted with enum pxa_ssp_type */ @@ -156,6 +162,7 @@ static const struct lpss_config lpss_platforms[] = { .tx_threshold_hi = 56, .cs_sel_shift = 8, .cs_sel_mask = 3 << 8, + .cs_clk_stays_gated = true, }, }; @@ -383,6 +390,22 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable) else value |= LPSS_CS_CONTROL_CS_HIGH; __lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value); + if (config->cs_clk_stays_gated) { + u32 clkgate; + + /* + * Changing CS alone when dynamic clock gating is on won't + * actually flip CS at that time. This ruins SPI transfers + * that specify delays, or have no data. Toggle the clock mode + * to force on briefly to poke the CS pin to move. + */ + clkgate = __lpss_ssp_read_priv(drv_data, LPSS_PRIV_CLOCK_GATE); + value = (clkgate & ~LPSS_PRIV_CLOCK_GATE_CLK_CTL_MASK) | + LPSS_PRIV_CLOCK_GATE_CLK_CTL_FORCE_ON; + + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, value); + __lpss_ssp_write_priv(drv_data, LPSS_PRIV_CLOCK_GATE, clkgate); + } } static void cs_assert(struct spi_device *spi) diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c index dd3434a407ea..a364b99497e2 100644 --- a/drivers/spi/spi-qup.c +++ b/drivers/spi/spi-qup.c @@ -1217,6 +1217,11 @@ static int spi_qup_suspend(struct device *device) struct spi_qup *controller = spi_master_get_devdata(master); int ret; + if (pm_runtime_suspended(device)) { + ret = spi_qup_pm_resume_runtime(device); + if (ret) + return ret; + } ret = spi_master_suspend(master); if (ret) return ret; @@ -1225,10 +1230,8 @@ static int spi_qup_suspend(struct device *device) if (ret) return ret; - if (!pm_runtime_suspended(device)) { - clk_disable_unprepare(controller->cclk); - clk_disable_unprepare(controller->iclk); - } + clk_disable_unprepare(controller->cclk); + clk_disable_unprepare(controller->iclk); return 0; } diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 60c4de4e4485..7412a3042a8d 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -401,9 +401,6 @@ static void zynqmp_qspi_chipselect(struct spi_device *qspi, bool is_high) zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry); - /* Dummy generic FIFO entry */ - zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0); - /* Manually start the generic FIFO command */ zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) | diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8994545367a2..0e70af2677fe 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2615,7 +2615,7 @@ int spi_register_controller(struct spi_controller *ctlr) if (ctlr->use_gpio_descriptors) { status = spi_get_gpio_descs(ctlr); if (status) - return status; + goto free_bus_id; /* * A controller using GPIO descriptors always * supports SPI_CS_HIGH if need be. @@ -2625,7 +2625,7 @@ int spi_register_controller(struct spi_controller *ctlr) /* Legacy code path for GPIOs from DT */ status = of_spi_get_gpio_numbers(ctlr); if (status) - return status; + goto free_bus_id; } } @@ -2633,17 +2633,14 @@ int spi_register_controller(struct spi_controller *ctlr) * Even if it's just one always-selected device, there must * be at least one chipselect. */ - if (!ctlr->num_chipselect) - return -EINVAL; + if (!ctlr->num_chipselect) { + status = -EINVAL; + goto free_bus_id; + } status = device_add(&ctlr->dev); - if (status < 0) { - /* free bus id */ - mutex_lock(&board_lock); - idr_remove(&spi_master_idr, ctlr->bus_num); - mutex_unlock(&board_lock); - goto done; - } + if (status < 0) + goto free_bus_id; dev_dbg(dev, "registered %s %s\n", spi_controller_is_slave(ctlr) ? "slave" : "master", dev_name(&ctlr->dev)); @@ -2659,11 +2656,7 @@ int spi_register_controller(struct spi_controller *ctlr) status = spi_controller_initialize_queue(ctlr); if (status) { device_del(&ctlr->dev); - /* free bus id */ - mutex_lock(&board_lock); - idr_remove(&spi_master_idr, ctlr->bus_num); - mutex_unlock(&board_lock); - goto done; + goto free_bus_id; } } /* add statistics */ @@ -2678,7 +2671,12 @@ int spi_register_controller(struct spi_controller *ctlr) /* Register devices from the device tree and ACPI */ of_register_spi_devices(ctlr); acpi_register_spi_devices(ctlr); -done: + return status; + +free_bus_id: + mutex_lock(&board_lock); + idr_remove(&spi_master_idr, ctlr->bus_num); + mutex_unlock(&board_lock); return status; } EXPORT_SYMBOL_GPL(spi_register_controller); diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 1e217e3e9486..2ab6e782f14c 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -396,6 +396,7 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) else retval = get_user(tmp, (u32 __user *)arg); if (retval == 0) { + struct spi_controller *ctlr = spi->controller; u32 save = spi->mode; if (tmp & ~SPI_MODE_MASK) { @@ -403,6 +404,10 @@ spidev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) break; } + if (ctlr->use_gpio_descriptors && ctlr->cs_gpiods && + ctlr->cs_gpiods[spi->chip_select]) + tmp |= SPI_CS_HIGH; + tmp |= spi->mode & ~SPI_MODE_MASK; spi->mode = (u16)tmp; retval = spi_setup(spi); diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index 97acc2ba2912..de844b412110 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -731,6 +731,7 @@ static int qpnpint_irq_domain_translate(struct irq_domain *d, return 0; } +static struct lock_class_key qpnpint_irq_lock_class, qpnpint_irq_request_class; static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb, struct irq_domain *domain, unsigned int virq, @@ -746,6 +747,9 @@ static void qpnpint_irq_domain_map(struct spmi_pmic_arb *pmic_arb, else handler = handle_level_irq; + + irq_set_lockdep_class(virq, &qpnpint_irq_lock_class, + &qpnpint_irq_request_class); irq_domain_set_info(domain, virq, hwirq, &pmic_arb_irqchip, pmic_arb, handler, NULL, NULL); } diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 74d497d39c5a..c6695354b123 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -351,8 +351,23 @@ static inline vm_flags_t calc_vm_may_flags(unsigned long prot) _calc_vm_trans(prot, PROT_EXEC, VM_MAYEXEC); } +static int ashmem_vmfile_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* do not allow to mmap ashmem backing shmem file directly */ + return -EPERM; +} + +static unsigned long +ashmem_vmfile_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) { + static struct file_operations vmfile_fops; struct ashmem_area *asma = file->private_data; int ret = 0; @@ -393,6 +408,19 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma) } vmfile->f_mode |= FMODE_LSEEK; asma->file = vmfile; + /* + * override mmap operation of the vmfile so that it can't be + * remapped which would lead to creation of a new vma with no + * asma permission checks. Have to override get_unmapped_area + * as well to prevent VM_BUG_ON check for f_ops modification. + */ + if (!vmfile_fops.mmap) { + vmfile_fops = *vmfile->f_op; + vmfile_fops.mmap = ashmem_vmfile_mmap; + vmfile_fops.get_unmapped_area = + ashmem_vmfile_get_unmapped_area; + } + vmfile->f_op = &vmfile_fops; } get_file(asma->file); diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index 9b19ea9d3fa1..9a3f7c034ab4 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -92,8 +92,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index ba6f905f26fa..69c6dce9be31 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -19,6 +19,7 @@ #include #define MAX_NUM_DEVICES 10 +#define MAX_SYSFS_PREFIX 0x80 #define MAX_SYSFS_PATH 0x200 #define CSV_MAX_LINE 0x1000 #define SYSFS_MAX_INT 0x20 @@ -67,7 +68,7 @@ struct loopback_results { }; struct loopback_device { - char name[MAX_SYSFS_PATH]; + char name[MAX_STR_LEN]; char sysfs_entry[MAX_SYSFS_PATH]; char debugfs_entry[MAX_SYSFS_PATH]; struct loopback_results results; @@ -93,8 +94,8 @@ struct loopback_test { int stop_all; int poll_count; char test_name[MAX_STR_LEN]; - char sysfs_prefix[MAX_SYSFS_PATH]; - char debugfs_prefix[MAX_SYSFS_PATH]; + char sysfs_prefix[MAX_SYSFS_PREFIX]; + char debugfs_prefix[MAX_SYSFS_PREFIX]; struct timespec poll_timeout; struct loopback_device devices[MAX_NUM_DEVICES]; struct loopback_results aggregate_results; @@ -637,7 +638,7 @@ int find_loopback_devices(struct loopback_test *t) static int open_poll_files(struct loopback_test *t) { struct loopback_device *dev; - char buf[MAX_STR_LEN]; + char buf[MAX_SYSFS_PATH + MAX_STR_LEN]; char dummy; int fds_idx = 0; int i; @@ -655,7 +656,7 @@ static int open_poll_files(struct loopback_test *t) goto err; } read(t->fds[fds_idx].fd, &dummy, 1); - t->fds[fds_idx].events = EPOLLERR|EPOLLPRI; + t->fds[fds_idx].events = POLLERR | POLLPRI; t->fds[fds_idx].revents = 0; fds_idx++; } @@ -748,7 +749,7 @@ static int wait_for_complete(struct loopback_test *t) } for (i = 0; i < t->poll_count; i++) { - if (t->fds[i].revents & EPOLLPRI) { + if (t->fds[i].revents & POLLPRI) { /* Dummy read to clear the event */ read(t->fds[i].fd, &dummy, 1); number_of_events++; @@ -907,10 +908,10 @@ int main(int argc, char *argv[]) t.iteration_max = atoi(optarg); break; case 'S': - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'D': - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'm': t.mask = atol(optarg); @@ -961,10 +962,10 @@ int main(int argc, char *argv[]) } if (!strcmp(t.sysfs_prefix, "")) - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix); if (!strcmp(t.debugfs_prefix, "")) - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix); ret = find_loopback_devices(&t); if (ret) diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 26108c96b674..37c7cf6b7d8a 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -553,13 +553,13 @@ static int hantro_attach_func(struct hantro_dev *vpu, goto err_rel_entity1; /* Connect the three entities */ - ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 1, + ret = media_create_pad_link(&func->vdev.entity, 0, &func->proc, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) goto err_rel_entity2; - ret = media_create_pad_link(&func->proc, 0, &func->sink, 0, + ret = media_create_pad_link(&func->proc, 1, &func->sink, 0, MEDIA_LNK_FL_IMMUTABLE | MEDIA_LNK_FL_ENABLED); if (ret) diff --git a/drivers/staging/media/meson/vdec/vdec.c b/drivers/staging/media/meson/vdec/vdec.c index 0a1a04fd5d13..8dd1396909d7 100644 --- a/drivers/staging/media/meson/vdec/vdec.c +++ b/drivers/staging/media/meson/vdec/vdec.c @@ -133,6 +133,8 @@ vdec_queue_recycle(struct amvdec_session *sess, struct vb2_buffer *vb) struct amvdec_buffer *new_buf; new_buf = kmalloc(sizeof(*new_buf), GFP_KERNEL); + if (!new_buf) + return; new_buf->vb = vb; mutex_lock(&sess->bufs_recycle_lock); diff --git a/drivers/staging/most/net/net.c b/drivers/staging/most/net/net.c index 6cab1bb8956e..faffe6b63be4 100644 --- a/drivers/staging/most/net/net.c +++ b/drivers/staging/most/net/net.c @@ -81,6 +81,11 @@ static int skb_to_mamac(const struct sk_buff *skb, struct mbo *mbo) unsigned int payload_len = skb->len - ETH_HLEN; unsigned int mdp_len = payload_len + MDP_HDR_LEN; + if (mdp_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mdp_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mdp_len); @@ -128,6 +133,11 @@ static int skb_to_mep(const struct sk_buff *skb, struct mbo *mbo) u8 *buff = mbo->virt_address; unsigned int mep_len = skb->len + MEP_HDR_LEN; + if (mep_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mep_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mep_len); diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 710c33fd4965..df945a059cf6 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -222,18 +222,21 @@ static char *translate_scan(struct adapter *padapter, /* parsing WPA/WPA2 IE */ { - u8 buf[MAX_WPA_IE_LEN]; + u8 *buf; u8 wpa_ie[255], rsn_ie[255]; u16 wpa_len = 0, rsn_len = 0; u8 *p; + buf = kzalloc(MAX_WPA_IE_LEN, GFP_ATOMIC); + if (!buf) + return start; + rtw_get_sec_ie(pnetwork->network.ies, pnetwork->network.ie_length, rsn_ie, &rsn_len, wpa_ie, &wpa_len); RT_TRACE(_module_rtl871x_mlme_c_, _drv_info_, ("rtw_wx_get_scan: ssid =%s\n", pnetwork->network.ssid.ssid)); RT_TRACE(_module_rtl871x_mlme_c_, _drv_info_, ("rtw_wx_get_scan: wpa_len =%d rsn_len =%d\n", wpa_len, rsn_len)); if (wpa_len > 0) { p = buf; - memset(buf, 0, MAX_WPA_IE_LEN); p += sprintf(p, "wpa_ie="); for (i = 0; i < wpa_len; i++) p += sprintf(p, "%02x", wpa_ie[i]); @@ -250,7 +253,6 @@ static char *translate_scan(struct adapter *padapter, } if (rsn_len > 0) { p = buf; - memset(buf, 0, MAX_WPA_IE_LEN); p += sprintf(p, "rsn_ie="); for (i = 0; i < rsn_len; i++) p += sprintf(p, "%02x", rsn_ie[i]); @@ -264,6 +266,7 @@ static char *translate_scan(struct adapter *padapter, iwe.u.data.length = rsn_len; start = iwe_stream_add_point(info, start, stop, &iwe, rsn_ie); } + kfree(buf); } {/* parsing WPS IE */ @@ -2008,7 +2011,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -2795,7 +2798,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index b5d42f411dd8..845c8817281c 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -38,6 +38,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ + {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index b44e902ed338..b6d56cfb0a19 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -476,14 +476,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index db6528a01229..2ac0d84f090e 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3373,7 +3373,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -4207,7 +4207,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 488f2539aa9a..81ecfd1a200d 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -561,7 +561,7 @@ static u_long get_word(struct vc_data *vc) return 0; } else if (tmpx < vc->vc_cols - 2 && (ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) && - get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) { + get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) { tmp_pos += 2; tmpx++; } else { diff --git a/drivers/staging/speakup/selection.c b/drivers/staging/speakup/selection.c index a8b4d0c5ab7e..032f3264fba1 100644 --- a/drivers/staging/speakup/selection.c +++ b/drivers/staging/speakup/selection.c @@ -51,9 +51,7 @@ static void __speakup_set_selection(struct work_struct *work) goto unref; } - console_lock(); set_selection_kernel(&sel, tty); - console_unlock(); unref: tty_kref_put(tty); diff --git a/drivers/staging/vt6656/device.h b/drivers/staging/vt6656/device.h index 50e1c8918040..e2fabe818b19 100644 --- a/drivers/staging/vt6656/device.h +++ b/drivers/staging/vt6656/device.h @@ -52,6 +52,8 @@ #define RATE_AUTO 12 #define MAX_RATE 12 +#define VNT_B_RATES (BIT(RATE_1M) | BIT(RATE_2M) |\ + BIT(RATE_5M) | BIT(RATE_11M)) /* * device specific diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 3b94e80f1d5e..879ceef517fb 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -130,7 +130,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; skb_pull(skb, 8); diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index f40947955675..af215860be4c 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -99,9 +99,11 @@ static int vnt_int_report_rate(struct vnt_private *priv, u8 pkt_no, u8 tsr) info->status.rates[0].count = tx_retry; - if (!(tsr & (TSR_TMO | TSR_RETRYTMO))) { + if (!(tsr & TSR_TMO)) { info->status.rates[0].idx = idx; - info->flags |= IEEE80211_TX_STAT_ACK; + + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) + info->flags |= IEEE80211_TX_STAT_ACK; } ieee80211_tx_status_irqsafe(priv->hw, context->skb); diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index 9cb924c54571..5e48b3ddb94c 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -1015,6 +1015,7 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(priv->hw, SUPPORTS_PS); + ieee80211_hw_set(priv->hw, PS_NULLFUNC_STACK); priv->hw->max_signal = 100; diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index f9020a4f7bbf..29caba728906 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -278,11 +278,9 @@ static u16 vnt_rxtx_datahead_g(struct vnt_usb_send_context *tx_context, PK_TYPE_11B, &buf->b); /* Get Duration and TimeStamp */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration_a = dur; - buf->duration_b = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration_a = hdr->duration_id; + buf->duration_b = hdr->duration_id; } else { buf->duration_a = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -371,10 +369,8 @@ static u16 vnt_rxtx_datahead_ab(struct vnt_usb_send_context *tx_context, tx_context->pkt_type, &buf->ab); /* Get Duration and TimeStampOff */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration = hdr->duration_id; } else { buf->duration = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -815,10 +811,14 @@ int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) if (info->band == NL80211_BAND_5GHZ) { pkt_type = PK_TYPE_11A; } else { - if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) - pkt_type = PK_TYPE_11GB; - else - pkt_type = PK_TYPE_11GA; + if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { + if (priv->basic_rates & VNT_B_RATES) + pkt_type = PK_TYPE_11GB; + else + pkt_type = PK_TYPE_11GA; + } else { + pkt_type = PK_TYPE_11A; + } } } else { pkt_type = PK_TYPE_11B; diff --git a/drivers/staging/wfx/data_tx.c b/drivers/staging/wfx/data_tx.c index b13d7341f8bb..0c6a3a1a1ddf 100644 --- a/drivers/staging/wfx/data_tx.c +++ b/drivers/staging/wfx/data_tx.c @@ -282,8 +282,7 @@ void wfx_tx_policy_init(struct wfx_vif *wvif) static int wfx_alloc_link_id(struct wfx_vif *wvif, const u8 *mac) { int i, ret = 0; - unsigned long max_inactivity = 0; - unsigned long now = jiffies; + unsigned long oldest; spin_lock_bh(&wvif->ps_state_lock); for (i = 0; i < WFX_MAX_STA_IN_AP_MODE; ++i) { @@ -292,13 +291,10 @@ static int wfx_alloc_link_id(struct wfx_vif *wvif, const u8 *mac) break; } else if (wvif->link_id_db[i].status != WFX_LINK_HARD && !wvif->wdev->tx_queue_stats.link_map_cache[i + 1]) { - unsigned long inactivity = - now - wvif->link_id_db[i].timestamp; - - if (inactivity < max_inactivity) - continue; - max_inactivity = inactivity; - ret = i + 1; + if (!ret || time_after(oldest, wvif->link_id_db[i].timestamp)) { + oldest = wvif->link_id_db[i].timestamp; + ret = i + 1; + } } } diff --git a/drivers/staging/wfx/debug.c b/drivers/staging/wfx/debug.c index d17a75242365..1164aba118a1 100644 --- a/drivers/staging/wfx/debug.c +++ b/drivers/staging/wfx/debug.c @@ -145,7 +145,7 @@ static int wfx_rx_stats_show(struct seq_file *seq, void *v) st->pwr_clk_freq, st->is_ext_pwr_clk ? "yes" : "no"); seq_printf(seq, - "N. of frames: %d, PER (x10e4): %d, Throughput: %dKbps/s\n", + "Num. of frames: %d, PER (x10e4): %d, Throughput: %dKbps/s\n", st->nb_rx_frame, st->per_total, st->throughput); seq_puts(seq, " Num. of PER RSSI SNR CFO\n"); seq_puts(seq, " frames (x10e4) (dBm) (dB) (kHz)\n"); diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index 7350fe5d96a3..a8860d2aee68 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -959,7 +959,7 @@ int prism2mgmt_flashdl_state(struct wlandevice *wlandev, void *msgp) } } - return 0; + return result; } /*---------------------------------------------------------------- diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index b94ed4e30770..09e55ea0bf5d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1165,9 +1165,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, hdr->cmdsn, be32_to_cpu(hdr->data_length), payload_length, conn->cid); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); cmd->sense_reason = transport_lookup_cmd_lun(&cmd->se_cmd, scsilun_to_int(&hdr->lun)); @@ -2004,9 +2002,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, conn->sess->se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG, cmd->sense_buffer + 2); - if (target_get_sess_cmd(&cmd->se_cmd, true) < 0) - return iscsit_add_reject_cmd(cmd, - ISCSI_REASON_WAITING_FOR_LOGOUT, buf); + target_get_sess_cmd(&cmd->se_cmd, true); /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -4149,6 +4145,9 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4231,11 +4230,6 @@ int iscsit_close_connection( * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); - target_sess_cmd_list_set_waiting(sess->se_sess); - target_wait_for_sess_cmds(sess->se_sess); - - if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ea482d4b1f00..0ae9e60fc4d5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -666,6 +666,11 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) target_remove_from_state_list(cmd); + /* + * Clear struct se_cmd->se_lun before the handoff to FE. + */ + cmd->se_lun = NULL; + spin_lock_irqsave(&cmd->t_state_lock, flags); /* * Determine if frontend context caller is requesting the stopping of @@ -693,6 +698,17 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) return cmd->se_tfo->check_stop_free(cmd); } +static void transport_lun_remove_cmd(struct se_cmd *cmd) +{ + struct se_lun *lun = cmd->se_lun; + + if (!lun) + return; + + if (cmpxchg(&cmd->lun_ref_active, true, false)) + percpu_ref_put(&lun->lun_ref); +} + static void target_complete_failure_work(struct work_struct *work) { struct se_cmd *cmd = container_of(work, struct se_cmd, work); @@ -783,6 +799,8 @@ static void target_handle_abort(struct se_cmd *cmd) WARN_ON_ONCE(kref_read(&cmd->cmd_kref) == 0); + transport_lun_remove_cmd(cmd); + transport_cmd_check_stop_to_fabric(cmd); } @@ -1708,6 +1726,7 @@ static void target_complete_tmr_failure(struct work_struct *work) se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST; se_cmd->se_tfo->queue_tm_rsp(se_cmd); + transport_lun_remove_cmd(se_cmd); transport_cmd_check_stop_to_fabric(se_cmd); } @@ -1898,6 +1917,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, goto queue_full; check_stop: + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; @@ -2195,6 +2215,7 @@ static void transport_complete_qf(struct se_cmd *cmd) transport_handle_queue_full(cmd, cmd->se_dev, ret, false); return; } + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); } @@ -2289,6 +2310,7 @@ static void target_complete_ok_work(struct work_struct *work) if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2314,6 +2336,7 @@ static void target_complete_ok_work(struct work_struct *work) if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2349,6 +2372,7 @@ static void target_complete_ok_work(struct work_struct *work) if (ret) goto queue_full; + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; } @@ -2384,6 +2408,7 @@ static void target_complete_ok_work(struct work_struct *work) break; } + transport_lun_remove_cmd(cmd); transport_cmd_check_stop_to_fabric(cmd); return; @@ -2710,6 +2735,9 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) */ if (cmd->state_active) target_remove_from_state_list(cmd); + + if (cmd->se_lun) + transport_lun_remove_cmd(cmd); } if (aborted) cmd->free_compl = &compl; @@ -2781,9 +2809,6 @@ static void target_release_cmd_kref(struct kref *kref) struct completion *abrt_compl = se_cmd->abrt_compl; unsigned long flags; - if (se_cmd->lun_ref_active) - percpu_ref_put(&se_cmd->se_lun->lun_ref); - if (se_sess) { spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); list_del_init(&se_cmd->se_cmd_list); diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c index 5825ac581f56..680f1a070606 100644 --- a/drivers/thermal/broadcom/brcmstb_thermal.c +++ b/drivers/thermal/broadcom/brcmstb_thermal.c @@ -49,7 +49,7 @@ #define AVS_TMON_TP_TEST_ENABLE 0x20 /* Default coefficients */ -#define AVS_TMON_TEMP_SLOPE -487 +#define AVS_TMON_TEMP_SLOPE 487 #define AVS_TMON_TEMP_OFFSET 410040 /* HW related temperature constants */ @@ -108,23 +108,12 @@ struct brcmstb_thermal_priv { struct thermal_zone_device *thermal; }; -static void avs_tmon_get_coeffs(struct thermal_zone_device *tz, int *slope, - int *offset) -{ - *slope = thermal_zone_get_slope(tz); - *offset = thermal_zone_get_offset(tz); -} - /* Convert a HW code to a temperature reading (millidegree celsius) */ static inline int avs_tmon_code_to_temp(struct thermal_zone_device *tz, u32 code) { - const int val = code & AVS_TMON_TEMP_MASK; - int slope, offset; - - avs_tmon_get_coeffs(tz, &slope, &offset); - - return slope * val + offset; + return (AVS_TMON_TEMP_OFFSET - + (int)((code & AVS_TMON_TEMP_MAX) * AVS_TMON_TEMP_SLOPE)); } /* @@ -136,20 +125,18 @@ static inline int avs_tmon_code_to_temp(struct thermal_zone_device *tz, static inline u32 avs_tmon_temp_to_code(struct thermal_zone_device *tz, int temp, bool low) { - int slope, offset; - if (temp < AVS_TMON_TEMP_MIN) - return AVS_TMON_TEMP_MAX; /* Maximum code value */ - - avs_tmon_get_coeffs(tz, &slope, &offset); + return AVS_TMON_TEMP_MAX; /* Maximum code value */ - if (temp >= offset) + if (temp >= AVS_TMON_TEMP_OFFSET) return 0; /* Minimum code value */ if (low) - return (u32)(DIV_ROUND_UP(offset - temp, abs(slope))); + return (u32)(DIV_ROUND_UP(AVS_TMON_TEMP_OFFSET - temp, + AVS_TMON_TEMP_SLOPE)); else - return (u32)((offset - temp) / abs(slope)); + return (u32)((AVS_TMON_TEMP_OFFSET - temp) / + AVS_TMON_TEMP_SLOPE); } static int brcmstb_get_temp(void *data, int *temp) diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 372dbbaaafb8..21d4d6e6409a 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -152,8 +152,8 @@ static irqreturn_t prcmu_high_irq_handler(int irq, void *irq_data) db8500_thermal_update_config(th, idx, THERMAL_TREND_RAISING, next_low, next_high); - dev_info(&th->tz->device, - "PRCMU set max %ld, min %ld\n", next_high, next_low); + dev_dbg(&th->tz->device, + "PRCMU set max %ld, min %ld\n", next_high, next_low); } else if (idx == num_points - 1) /* So we roof out 1 degree over the max point */ th->interpolated_temp = db8500_thermal_points[idx] + 1; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index ca86a8e09c77..f0f77da6ca26 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -274,6 +274,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val, return ret; } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -319,6 +325,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } @@ -841,7 +848,7 @@ static bool tb_port_is_width_supported(struct tb_port *port, int width) ret = tb_port_read(port, &phy, TB_CFG_PORT, port->cap_phy + LANE_ADP_CS_0, 1); if (ret) - return ret; + return false; widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >> LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT; diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index ce5309d00280..0f64a10ba51f 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -18,6 +18,7 @@ #include #include #include +#include static bool is_registered; static DEFINE_IDA(ctrl_ida); @@ -630,6 +631,15 @@ static int acpi_serdev_check_resources(struct serdev_controller *ctrl, if (ret) return ret; + /* + * Apple machines provide an empty resource template, so on those + * machines just look for immediate children with a "baud" property + * (from the _DSM method) instead. + */ + if (!lookup.controller_handle && x86_apple_machine && + !acpi_dev_get_property(adev, "baud", ACPI_TYPE_BUFFER, NULL)) + acpi_get_parent(adev->handle, &lookup.controller_handle); + /* Make sure controller and ResourceSource handle match */ if (ACPI_HANDLE(ctrl->dev.parent) != lookup.controller_handle) return -ENODEV; diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index d1cdd2ab8b4c..d367803e2044 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -265,7 +265,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; int ret; @@ -284,7 +283,6 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; @@ -297,7 +295,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -312,8 +310,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 6e67fd89445a..0ed5404f35d6 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -449,7 +449,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.handle_irq = aspeed_vuart_handle_irq; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c index 8ce700c1a7fc..4997c519ebb3 100644 --- a/drivers/tty/serial/8250/8250_bcm2835aux.c +++ b/drivers/tty/serial/8250/8250_bcm2835aux.c @@ -113,7 +113,7 @@ static int bcm2835aux_serial_remove(struct platform_device *pdev) { struct bcm2835aux_data *data = platform_get_drvdata(pdev); - serial8250_unregister_port(data->uart.port.line); + serial8250_unregister_port(data->line); clk_disable_unprepare(data->clk); return 0; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index e682390ce0de..28bdbd7b4ab2 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -174,7 +174,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -209,9 +209,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 108cd55f9c4d..405370c6eee5 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -25,6 +25,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -677,6 +685,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -745,6 +769,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 92fbf46ce3bd..3205c83577e0 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -202,7 +202,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->type = type; port->uartclk = clk; - port->irqflags |= IRQF_SHARED; if (of_property_read_bool(np, "no-loopback-test")) port->flags |= UPF_SKIP_TEST; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 90655910b0c7..5741b3822cf6 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2178,6 +2178,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index 3bdd56a1021b..ea12f10610b6 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -286,6 +286,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -418,6 +422,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 1ba9bc667e13..8a909d556185 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -574,7 +574,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 4e128d19e0ad..8a69ec282a43 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -268,6 +268,7 @@ struct lpuart_port { int rx_dma_rng_buf_len; unsigned int dma_tx_nents; wait_queue_head_t dma_wait; + bool id_allocated; }; struct lpuart_soc_data { @@ -2429,19 +2430,6 @@ static int lpuart_probe(struct platform_device *pdev) if (!sport) return -ENOMEM; - ret = of_alias_get_id(np, "serial"); - if (ret < 0) { - ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); - if (ret < 0) { - dev_err(&pdev->dev, "port line is full, add device failed\n"); - return ret; - } - } - if (ret >= ARRAY_SIZE(lpuart_ports)) { - dev_err(&pdev->dev, "serial%d out of range\n", ret); - return -EINVAL; - } - sport->port.line = ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); sport->port.membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(sport->port.membase)) @@ -2485,9 +2473,25 @@ static int lpuart_probe(struct platform_device *pdev) } } + ret = of_alias_get_id(np, "serial"); + if (ret < 0) { + ret = ida_simple_get(&fsl_lpuart_ida, 0, UART_NR, GFP_KERNEL); + if (ret < 0) { + dev_err(&pdev->dev, "port line is full, add device failed\n"); + return ret; + } + sport->id_allocated = true; + } + if (ret >= ARRAY_SIZE(lpuart_ports)) { + dev_err(&pdev->dev, "serial%d out of range\n", ret); + ret = -EINVAL; + goto failed_out_of_range; + } + sport->port.line = ret; + ret = lpuart_enable_clks(sport); if (ret) - return ret; + goto failed_clock_enable; sport->port.uartclk = lpuart_get_baud_clk_rate(sport); lpuart_ports[sport->port.line] = sport; @@ -2537,6 +2541,10 @@ static int lpuart_probe(struct platform_device *pdev) failed_attach_port: failed_irq_request: lpuart_disable_clks(sport); +failed_clock_enable: +failed_out_of_range: + if (sport->id_allocated) + ida_simple_remove(&fsl_lpuart_ida, sport->port.line); return ret; } @@ -2546,7 +2554,8 @@ static int lpuart_remove(struct platform_device *pdev) uart_remove_one_port(&lpuart_reg, &sport->port); - ida_simple_remove(&fsl_lpuart_ida, sport->port.line); + if (sport->id_allocated) + ida_simple_remove(&fsl_lpuart_ida, sport->port.line); lpuart_disable_clks(sport); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index a9e20e6c63ad..0357fad48247 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -603,7 +603,7 @@ static void imx_uart_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { @@ -700,22 +700,33 @@ static void imx_uart_start_tx(struct uart_port *port) } } -static irqreturn_t imx_uart_rtsint(int irq, void *dev_id) +static irqreturn_t __imx_uart_rtsint(int irq, void *dev_id) { struct imx_port *sport = dev_id; u32 usr1; - spin_lock(&sport->port.lock); - imx_uart_writel(sport, USR1_RTSD, USR1); usr1 = imx_uart_readl(sport, USR1) & USR1_RTSS; uart_handle_cts_change(&sport->port, !!usr1); wake_up_interruptible(&sport->port.state->port.delta_msr_wait); - spin_unlock(&sport->port.lock); return IRQ_HANDLED; } +static irqreturn_t imx_uart_rtsint(int irq, void *dev_id) +{ + struct imx_port *sport = dev_id; + irqreturn_t ret; + + spin_lock(&sport->port.lock); + + ret = __imx_uart_rtsint(irq, dev_id); + + spin_unlock(&sport->port.lock); + + return ret; +} + static irqreturn_t imx_uart_txint(int irq, void *dev_id) { struct imx_port *sport = dev_id; @@ -726,14 +737,12 @@ static irqreturn_t imx_uart_txint(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t imx_uart_rxint(int irq, void *dev_id) +static irqreturn_t __imx_uart_rxint(int irq, void *dev_id) { struct imx_port *sport = dev_id; unsigned int rx, flg, ignored = 0; struct tty_port *port = &sport->port.state->port; - spin_lock(&sport->port.lock); - while (imx_uart_readl(sport, USR2) & USR2_RDR) { u32 usr2; @@ -792,11 +801,25 @@ static irqreturn_t imx_uart_rxint(int irq, void *dev_id) } out: - spin_unlock(&sport->port.lock); tty_flip_buffer_push(port); + return IRQ_HANDLED; } +static irqreturn_t imx_uart_rxint(int irq, void *dev_id) +{ + struct imx_port *sport = dev_id; + irqreturn_t ret; + + spin_lock(&sport->port.lock); + + ret = __imx_uart_rxint(irq, dev_id); + + spin_unlock(&sport->port.lock); + + return ret; +} + static void imx_uart_clear_rx_errors(struct imx_port *sport); /* @@ -855,6 +878,8 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; + spin_lock(&sport->port.lock); + usr1 = imx_uart_readl(sport, USR1); usr2 = imx_uart_readl(sport, USR2); ucr1 = imx_uart_readl(sport, UCR1); @@ -888,27 +913,25 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) usr2 &= ~USR2_ORE; if (usr1 & (USR1_RRDY | USR1_AGTIM)) { - imx_uart_rxint(irq, dev_id); + __imx_uart_rxint(irq, dev_id); ret = IRQ_HANDLED; } if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) { - imx_uart_txint(irq, dev_id); + imx_uart_transmit_buffer(sport); ret = IRQ_HANDLED; } if (usr1 & USR1_DTRD) { imx_uart_writel(sport, USR1_DTRD, USR1); - spin_lock(&sport->port.lock); imx_uart_mctrl_check(sport); - spin_unlock(&sport->port.lock); ret = IRQ_HANDLED; } if (usr1 & USR1_RTSD) { - imx_uart_rtsint(irq, dev_id); + __imx_uart_rtsint(irq, dev_id); ret = IRQ_HANDLED; } @@ -923,6 +946,8 @@ static irqreturn_t imx_uart_int(int irq, void *dev_id) ret = IRQ_HANDLED; } + spin_unlock(&sport->port.lock); + return ret; } diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index c12a12556339..4e9a590712cb 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -851,7 +851,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); mvuart = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart), GFP_KERNEL); diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index ff63728a95f4..ebace5ad175c 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -128,6 +128,7 @@ static int handle_rx_console(struct uart_port *uport, u32 bytes, bool drop); static int handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop); static unsigned int qcom_geni_serial_tx_empty(struct uart_port *port); static void qcom_geni_serial_stop_rx(struct uart_port *uport); +static void qcom_geni_serial_handle_rx(struct uart_port *uport, bool drop); static const unsigned long root_freq[] = {7372800, 14745600, 19200000, 29491200, 32000000, 48000000, 64000000, 80000000, @@ -618,7 +619,7 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport) u32 irq_en; u32 status; struct qcom_geni_serial_port *port = to_dev_port(uport, uport); - u32 irq_clear = S_CMD_DONE_EN; + u32 s_irq_status; irq_en = readl(uport->membase + SE_GENI_S_IRQ_EN); irq_en &= ~(S_RX_FIFO_WATERMARK_EN | S_RX_FIFO_LAST_EN); @@ -634,10 +635,19 @@ static void qcom_geni_serial_stop_rx(struct uart_port *uport) return; geni_se_cancel_s_cmd(&port->se); - qcom_geni_serial_poll_bit(uport, SE_GENI_S_CMD_CTRL_REG, - S_GENI_CMD_CANCEL, false); + qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS, + S_CMD_CANCEL_EN, true); + /* + * If timeout occurs secondary engine remains active + * and Abort sequence is executed. + */ + s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS); + /* Flush the Rx buffer */ + if (s_irq_status & S_RX_FIFO_LAST_EN) + qcom_geni_serial_handle_rx(uport, true); + writel(s_irq_status, uport->membase + SE_GENI_S_IRQ_CLEAR); + status = readl(uport->membase + SE_GENI_STATUS); - writel(irq_clear, uport->membase + SE_GENI_S_IRQ_CLEAR); if (status & S_GENI_CMD_ACTIVE) qcom_geni_serial_abort_rx(uport); } diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index e8a9047de451..36f1a4d870eb 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1334,10 +1334,10 @@ static void throttle(struct tty_struct * tty) DBGINFO(("%s throttle\n", info->device_name)); if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1359,10 +1359,10 @@ static void unthrottle(struct tty_struct * tty) else send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2560,8 +2560,8 @@ static void change_params(struct slgt_info *info) info->read_status_mask = IRQ_RXOVER; if (I_INPCK(info->port.tty)) info->read_status_mask |= MASK_PARITY | MASK_FRAMING; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask |= MASK_BREAK; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask |= MASK_BREAK; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; if (I_IGNBRK(info->port.tty)) { @@ -3192,7 +3192,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3203,7 +3203,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3218,7 +3218,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index fcb91bf7a15b..54b897a646d0 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1453,10 +1453,10 @@ static void throttle(struct tty_struct * tty) if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1482,10 +1482,10 @@ static void unthrottle(struct tty_struct * tty) send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2470,7 +2470,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (status & SerialSignal_CTS) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx start..."); - info->port.tty->hw_stopped = 0; + info->port.tty->hw_stopped = 0; tx_start(info); info->pending_bh |= BH_TRANSMIT; return; @@ -2479,7 +2479,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (!(status & SerialSignal_CTS)) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx stop..."); - info->port.tty->hw_stopped = 1; + info->port.tty->hw_stopped = 1; tx_stop(info); } } @@ -2806,8 +2806,8 @@ static void change_params(SLMP_INFO *info) info->read_status_mask2 = OVRN; if (I_INPCK(info->port.tty)) info->read_status_mask2 |= PE | FRME; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask1 |= BRKD; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask1 |= BRKD; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask2 |= PE | FRME; if (I_IGNBRK(info->port.tty)) { @@ -3177,7 +3177,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) | @@ -3215,7 +3215,7 @@ static int tiocmset(struct tty_struct *tty, info->serial_signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -3227,7 +3227,7 @@ static int carrier_raised(struct tty_port *port) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; @@ -3243,7 +3243,7 @@ static void dtr_rts(struct tty_port *port, int on) info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR; else info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index d9f54c7d94f2..eb8c3e559363 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2734,9 +2734,11 @@ static int compat_tty_tiocgserial(struct tty_struct *tty, struct serial_struct32 v32; struct serial_struct v; int err; - memset(&v, 0, sizeof(struct serial_struct)); - if (!tty->ops->set_serial) + memset(&v, 0, sizeof(v)); + memset(&v32, 0, sizeof(v32)); + + if (!tty->ops->get_serial) return -ENOTTY; err = tty->ops->get_serial(tty, &v); if (!err) { diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 044c3cbdcfa4..ea80bf872f54 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -52,10 +52,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -68,7 +69,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 78732feaf65b..d7d2e4b844bc 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -43,6 +46,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -177,14 +181,14 @@ int set_selection_user(const struct tiocl_selection __user *sel, return set_selection_kernel(&v, tty); } -int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) +static int __set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int new_sel_start, new_sel_end, spc; char *bp, *obp; int i, ps, pe, multiplier; u32 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -332,7 +336,21 @@ int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) } } sel_buffer_lth = bp - sel_buffer; - return 0; + + return ret; +} + +int set_selection_kernel(struct tiocl_selection *v, struct tty_struct *tty) +{ + int ret; + + mutex_lock(&sel_lock); + console_lock(); + ret = __set_selection_kernel(v, tty); + console_unlock(); + mutex_unlock(&sel_lock); + + return ret; } EXPORT_SYMBOL_GPL(set_selection_kernel); @@ -350,6 +368,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -361,10 +380,17 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -373,11 +399,12 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } EXPORT_SYMBOL_GPL(paste_selection); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 34aa39d1aed9..e9e27ba69d5d 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -936,10 +936,21 @@ static void flush_scrollback(struct vc_data *vc) WARN_CONSOLE_UNLOCKED(); set_origin(vc); - if (vc->vc_sw->con_flush_scrollback) + if (vc->vc_sw->con_flush_scrollback) { vc->vc_sw->con_flush_scrollback(vc); - else + } else if (con_is_visible(vc)) { + /* + * When no con_flush_scrollback method is provided then the + * legacy way for flushing the scrollback buffer is to use + * a side effect of the con_switch method. We do it only on + * the foreground console as background consoles have no + * scrollback buffers in that case and we obviously don't + * want to switch to them. + */ + hide_cursor(vc); vc->vc_sw->con_switch(vc); + set_cursor(vc); + } } /* @@ -3035,10 +3046,8 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection_user((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 8b0ed139592f..ee6c91ef1f6c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -876,15 +876,20 @@ int vt_ioctl(struct tty_struct *tty, return -EINVAL; for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (v.v_vlin) - vc_cons[i].d->vc_scan_lines = v.v_vlin; - if (v.v_clin) - vc_cons[i].d->vc_font.height = v.v_clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, v.v_cols, v.v_rows); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index 81c88f7bbbcb..f6ab3f28c838 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -132,11 +132,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); + spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!test_and_set_bit(0, &priv->flags)) { + spin_unlock_irqrestore(&priv->lock, flags); disable_irq(dev_info->irq); + } } - spin_unlock_irqrestore(&priv->lock, flags); return 0; } diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 02f6ca2cb1ba..f624cc87cbab 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -2107,7 +2107,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, /* Update ring only if removed request is on pending_req_list list */ if (req_on_hw_ring) { link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma + - (priv_req->start_trb * TRB_SIZE)); + ((priv_req->end_trb + 1) * TRB_SIZE)); link_trb->control = (link_trb->control & TRB_CYCLE) | TRB_TYPE(TRB_LINK) | TRB_CHAIN; @@ -2152,11 +2152,21 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) { struct cdns3_device *priv_dev = priv_ep->cdns3_dev; struct usb_request *request; + struct cdns3_request *priv_req; + struct cdns3_trb *trb = NULL; int ret; int val; trace_cdns3_halt(priv_ep, 0, 0); + request = cdns3_next_request(&priv_ep->pending_req_list); + if (request) { + priv_req = to_cdns3_request(request); + trb = priv_req->trb; + if (trb) + trb->control = trb->control ^ TRB_CYCLE; + } + writel(EP_CMD_CSTALL | EP_CMD_EPRST, &priv_dev->regs->ep_cmd); /* wait for EPRST cleared */ @@ -2167,10 +2177,11 @@ int __cdns3_gadget_ep_clear_halt(struct cdns3_endpoint *priv_ep) priv_ep->flags &= ~(EP_STALLED | EP_STALL_PENDING); - request = cdns3_next_request(&priv_ep->pending_req_list); - - if (request) + if (request) { + if (trb) + trb->control = trb->control ^ TRB_CYCLE; cdns3_rearm_transfer(priv_ep, 1); + } cdns3_start_all_request(priv_dev, priv_ep); return ret; diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index ffaf46f5d062..4c4ac30db498 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1530,18 +1530,19 @@ static const struct usb_ep_ops usb_ep_ops = { static void ci_hdrc_gadget_connect(struct usb_gadget *_gadget, int is_active) { struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget); - unsigned long flags; if (is_active) { pm_runtime_get_sync(&_gadget->dev); hw_device_reset(ci); - spin_lock_irqsave(&ci->lock, flags); + spin_lock_irq(&ci->lock); if (ci->driver) { hw_device_state(ci, ci->ep0out->qh.dma); usb_gadget_set_state(_gadget, USB_STATE_POWERED); + spin_unlock_irq(&ci->lock); usb_udc_vbus_handler(_gadget, true); + } else { + spin_unlock_irq(&ci->lock); } - spin_unlock_irqrestore(&ci->lock, flags); } else { usb_udc_vbus_handler(_gadget, false); if (ci->driver) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 62f4fb9b362f..47f09a6ce7bd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -896,10 +896,10 @@ static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss) ss->xmit_fifo_size = acm->writesize; ss->baud_base = le32_to_cpu(acm->line.dwDTERate); - ss->close_delay = acm->port.close_delay / 10; + ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : - acm->port.closing_wait / 10; + jiffies_to_msecs(acm->port.closing_wait) / 10; return 0; } @@ -907,24 +907,32 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct acm *acm = tty->driver_data; unsigned int closing_wait, close_delay; + unsigned int old_closing_wait, old_close_delay; int retval = 0; - close_delay = ss->close_delay * 10; + close_delay = msecs_to_jiffies(ss->close_delay * 10); closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ? - ASYNC_CLOSING_WAIT_NONE : ss->closing_wait * 10; + ASYNC_CLOSING_WAIT_NONE : + msecs_to_jiffies(ss->closing_wait * 10); + + /* we must redo the rounding here, so that the values match */ + old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; + old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? + ASYNC_CLOSING_WAIT_NONE : + jiffies_to_msecs(acm->port.closing_wait) / 10; mutex_lock(&acm->port.mutex); - if (!capable(CAP_SYS_ADMIN)) { - if ((close_delay != acm->port.close_delay) || - (closing_wait != acm->port.closing_wait)) + if ((ss->close_delay != old_close_delay) || + (ss->closing_wait != old_closing_wait)) { + if (!capable(CAP_SYS_ADMIN)) retval = -EPERM; - else - retval = -EOPNOTSUPP; - } else { - acm->port.close_delay = close_delay; - acm->port.closing_wait = closing_wait; - } + else { + acm->port.close_delay = close_delay; + acm->port.closing_wait = closing_wait; + } + } else + retval = -EOPNOTSUPP; mutex_unlock(&acm->port.mutex); return retval; diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 26bc05e48d8a..7df22bcefa9d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -256,6 +256,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, struct usb_host_interface *ifp, int num_ep, unsigned char *buffer, int size) { + struct usb_device *udev = to_usb_device(ddev); unsigned char *buffer0 = buffer; struct usb_endpoint_descriptor *d; struct usb_host_endpoint *endpoint; @@ -297,6 +298,16 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, goto skip_to_next_endpoint_or_interface_descriptor; } + /* Ignore blacklisted endpoints */ + if (udev->quirks & USB_QUIRK_ENDPOINT_BLACKLIST) { + if (usb_endpoint_is_blacklisted(udev, ifp, d)) { + dev_warn(ddev, "config %d interface %d altsetting %d has a blacklisted endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, + d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; + } + } + endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; ++ifp->desc.bNumEndpoints; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3405b146edc9..54cd8ef795ec 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -38,7 +38,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ @@ -986,13 +988,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); @@ -1217,11 +1223,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif - /* Don't set the change_bits when the device - * was powered off. - */ - if (test_bit(port1, hub->power_bits)) - set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ @@ -1731,6 +1732,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1863,6 +1868,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface_no_resume(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -5599,6 +5609,10 @@ static void hub_event(struct work_struct *work) } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index a9e24e4b8df1..a97dd1ba964e 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -61,6 +61,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index bbbb35fa639f..235a7c645503 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -213,7 +213,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -266,7 +269,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6b6413073584..da30b5664ff3 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -231,6 +231,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -354,6 +357,10 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0904, 0x6103), .driver_info = USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL }, + /* Sound Devices USBPre2 */ + { USB_DEVICE(0x0926, 0x0202), .driver_info = + USB_QUIRK_ENDPOINT_BLACKLIST }, + /* Keytouch QWERTY Panel keyboard */ { USB_DEVICE(0x0926, 0x3333), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, @@ -371,6 +378,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek hub in Dell WD19 (Type-C) */ + { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + + /* Generic RTL8153 based ethernet adapters */ + { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -445,6 +458,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; @@ -472,6 +488,39 @@ static const struct usb_device_id usb_amd_resume_quirk_list[] = { { } /* terminating entry must be last */ }; +/* + * Entries for blacklisted endpoints that should be ignored when parsing + * configuration descriptors. + * + * Matched for devices with USB_QUIRK_ENDPOINT_BLACKLIST. + */ +static const struct usb_device_id usb_endpoint_blacklist[] = { + { USB_DEVICE_INTERFACE_NUMBER(0x0926, 0x0202, 1), .driver_info = 0x85 }, + { } +}; + +bool usb_endpoint_is_blacklisted(struct usb_device *udev, + struct usb_host_interface *intf, + struct usb_endpoint_descriptor *epd) +{ + const struct usb_device_id *id; + unsigned int address; + + for (id = usb_endpoint_blacklist; id->match_flags; ++id) { + if (!usb_match_device(udev, id)) + continue; + + if (!usb_match_one_id_intf(udev, intf, id)) + continue; + + address = id->driver_info; + if (address == epd->bEndpointAddress) + return true; + } + + return false; +} + static bool usb_match_any_interface(struct usb_device *udev, const struct usb_device_id *id) { diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index cf4783cf661a..3ad0ee57e859 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -37,6 +37,9 @@ extern void usb_authorize_interface(struct usb_interface *); extern void usb_detect_quirks(struct usb_device *udev); extern void usb_detect_interface_quirks(struct usb_device *udev); extern void usb_release_quirk_list(void); +extern bool usb_endpoint_is_blacklisted(struct usb_device *udev, + struct usb_host_interface *intf, + struct usb_endpoint_descriptor *epd); extern int usb_remove_device(struct usb_device *udev); extern int usb_get_device_descriptor(struct usb_device *dev, diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 6be10e496e10..7fd0900a9cb0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1083,11 +1083,6 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg, else packets = 1; /* send one packet if length is zero. */ - if (hs_ep->isochronous && length > (hs_ep->mc * hs_ep->ep.maxpacket)) { - dev_err(hsotg->dev, "req length > maxpacket*mc\n"); - return; - } - if (dir_in && index != 0) if (hs_ep->isochronous) epsize = DXEPTSIZ_MC(packets); @@ -1391,6 +1386,13 @@ static int dwc2_hsotg_ep_queue(struct usb_ep *ep, struct usb_request *req, req->actual = 0; req->status = -EINPROGRESS; + /* Don't queue ISOC request if length greater than mps*mc */ + if (hs_ep->isochronous && + req->length > (hs_ep->mc * hs_ep->ep.maxpacket)) { + dev_err(hs->dev, "req length > maxpacket*mc\n"); + return -EINVAL; + } + /* In DDMA mode for ISOC's don't queue request if length greater * than descriptor limits. */ @@ -1632,6 +1634,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg, struct dwc2_hsotg_ep *ep0 = hsotg->eps_out[0]; struct dwc2_hsotg_ep *ep; __le16 reply; + u16 status; int ret; dev_dbg(hsotg->dev, "%s: USB_REQ_GET_STATUS\n", __func__); @@ -1643,11 +1646,10 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg, switch (ctrl->bRequestType & USB_RECIP_MASK) { case USB_RECIP_DEVICE: - /* - * bit 0 => self powered - * bit 1 => remote wakeup - */ - reply = cpu_to_le16(0); + status = 1 << USB_DEVICE_SELF_POWERED; + status |= hsotg->remote_wakeup_allowed << + USB_DEVICE_REMOTE_WAKEUP; + reply = cpu_to_le16(status); break; case USB_RECIP_INTERFACE: @@ -1758,7 +1760,10 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg, case USB_RECIP_DEVICE: switch (wValue) { case USB_DEVICE_REMOTE_WAKEUP: - hsotg->remote_wakeup_allowed = 1; + if (set) + hsotg->remote_wakeup_allowed = 1; + else + hsotg->remote_wakeup_allowed = 0; break; case USB_DEVICE_TEST_MODE: @@ -1768,16 +1773,17 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg, return -EINVAL; hsotg->test_mode = wIndex >> 8; - ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0); - if (ret) { - dev_err(hsotg->dev, - "%s: failed to send reply\n", __func__); - return ret; - } break; default: return -ENOENT; } + + ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0); + if (ret) { + dev_err(hsotg->dev, + "%s: failed to send reply\n", __func__); + return ret; + } break; case USB_RECIP_ENDPOINT: @@ -4056,11 +4062,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, * a unique tx-fifo even if it is non-periodic. */ if (dir_in && hsotg->dedicated_fifos) { + unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); u32 fifo_index = 0; u32 fifo_size = UINT_MAX; size = hs_ep->ep.maxpacket * hs_ep->mc; - for (i = 1; i < hsotg->num_of_eps; ++i) { + for (i = 1; i <= fifo_count; ++i) { if (hsotg->fifo_map & (1 << i)) continue; val = dwc2_readl(hsotg, DPTXFSIZN(i)); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index f561c6c9e8a9..1d85c42b9c67 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1246,6 +1246,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) /* do nothing */ break; } + + /* de-assert DRVVBUS for HOST and OTG mode */ + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); } static void dwc3_get_properties(struct dwc3 *dwc) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 1c8b349379af..77c4a9abe365 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -688,7 +688,9 @@ struct dwc3_ep { #define DWC3_EP_STALL BIT(1) #define DWC3_EP_WEDGE BIT(2) #define DWC3_EP_TRANSFER_STARTED BIT(3) +#define DWC3_EP_END_TRANSFER_PENDING BIT(4) #define DWC3_EP_PENDING_REQUEST BIT(5) +#define DWC3_EP_DELAY_START BIT(6) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index e56beb9d1e36..4a13ceaf4093 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -256,86 +256,77 @@ static inline const char *dwc3_ep_event_string(char *str, size_t size, u8 epnum = event->endpoint_number; size_t len; int status; - int ret; - ret = snprintf(str, size, "ep%d%s: ", epnum >> 1, + len = scnprintf(str, size, "ep%d%s: ", epnum >> 1, (epnum & 1) ? "in" : "out"); - if (ret < 0) - return "UNKNOWN"; status = event->status; switch (event->endpoint_event) { case DWC3_DEPEVT_XFERCOMPLETE: - len = strlen(str); - snprintf(str + len, size - len, "Transfer Complete (%c%c%c)", + len += scnprintf(str + len, size - len, + "Transfer Complete (%c%c%c)", status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'L' : 'l'); - len = strlen(str); - if (epnum <= 1) - snprintf(str + len, size - len, " [%s]", + scnprintf(str + len, size - len, " [%s]", dwc3_ep0_state_string(ep0state)); break; case DWC3_DEPEVT_XFERINPROGRESS: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer In Progress [%d] (%c%c%c)", + scnprintf(str + len, size - len, + "Transfer In Progress [%d] (%c%c%c)", event->parameters, status & DEPEVT_STATUS_SHORT ? 'S' : 's', status & DEPEVT_STATUS_IOC ? 'I' : 'i', status & DEPEVT_STATUS_LST ? 'M' : 'm'); break; case DWC3_DEPEVT_XFERNOTREADY: - len = strlen(str); - - snprintf(str + len, size - len, "Transfer Not Ready [%d]%s", + len += scnprintf(str + len, size - len, + "Transfer Not Ready [%d]%s", event->parameters, status & DEPEVT_STATUS_TRANSFER_ACTIVE ? " (Active)" : " (Not Active)"); - len = strlen(str); - /* Control Endpoints */ if (epnum <= 1) { int phase = DEPEVT_STATUS_CONTROL_PHASE(event->status); switch (phase) { case DEPEVT_STATUS_CONTROL_DATA: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Data Phase]"); break; case DEPEVT_STATUS_CONTROL_STATUS: - snprintf(str + ret, size - ret, + scnprintf(str + len, size - len, " [Status Phase]"); } } break; case DWC3_DEPEVT_RXTXFIFOEVT: - snprintf(str + ret, size - ret, "FIFO"); + scnprintf(str + len, size - len, "FIFO"); break; case DWC3_DEPEVT_STREAMEVT: status = event->status; switch (status) { case DEPEVT_STREAMEVT_FOUND: - snprintf(str + ret, size - ret, " Stream %d Found", + scnprintf(str + len, size - len, " Stream %d Found", event->parameters); break; case DEPEVT_STREAMEVT_NOTFOUND: default: - snprintf(str + ret, size - ret, " Stream Not Found"); + scnprintf(str + len, size - len, " Stream Not Found"); break; } break; case DWC3_DEPEVT_EPCMDCMPLT: - snprintf(str + ret, size - ret, "Endpoint Command Complete"); + scnprintf(str + len, size - len, "Endpoint Command Complete"); break; default: - snprintf(str, size, "UNKNOWN"); + scnprintf(str + len, size - len, "UNKNOWN"); } return str; diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 294276f7deb9..7051611229c9 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -34,6 +34,7 @@ #define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e +#define PCI_DEVICE_ID_INTEL_CNPV 0xa3b0 #define PCI_DEVICE_ID_INTEL_ICLLP 0x34ee #define PCI_DEVICE_ID_INTEL_EHLLP 0x4b7e #define PCI_DEVICE_ID_INTEL_TGPLP 0xa0ee @@ -342,6 +343,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPH), (kernel_ulong_t) &dwc3_pci_intel_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPV), + (kernel_ulong_t) &dwc3_pci_intel_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICLLP), (kernel_ulong_t) &dwc3_pci_intel_properties, }, diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index fd1b100d2927..6dee4dabc0a4 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -1136,8 +1136,10 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc, case DWC3_DEPEVT_EPCMDCMPLT: cmd = DEPEVT_PARAMETER_CMD(event->parameters); - if (cmd == DWC3_DEPCMD_ENDTRANSFER) + if (cmd == DWC3_DEPCMD_ENDTRANSFER) { + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + } break; } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 154f3f3e8cff..6ac02ba5e4a1 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1068,7 +1068,14 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep, unsigned int rem = length % maxp; unsigned chain = true; - if (sg_is_last(s)) + /* + * IOMMU driver is coalescing the list of sgs which shares a + * page boundary into one and giving it to USB driver. With + * this the number of sgs mapped is not equal to the number of + * sgs passed. So mark the chain bit to false if it isthe last + * mapped sg. + */ + if (i == remaining - 1) chain = false; if (rem && usb_endpoint_dir_out(dep->endpoint.desc) && !chain) { @@ -1447,6 +1454,12 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) list_add_tail(&req->list, &dep->pending_list); req->status = DWC3_REQUEST_STATUS_QUEUED; + /* Start the transfer only after the END_TRANSFER is completed */ + if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) { + dep->flags |= DWC3_EP_DELAY_START; + return 0; + } + /* * NOTICE: Isochronous endpoints should NEVER be prestarted. We must * wait for a XferNotReady event so we will know what's the current @@ -2420,7 +2433,8 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep, if (event->status & DEPEVT_STATUS_SHORT && !chain) return 1; - if (event->status & DEPEVT_STATUS_IOC) + if ((trb->ctrl & DWC3_TRB_CTRL_IOC) || + (trb->ctrl & DWC3_TRB_CTRL_LST)) return 1; return 0; @@ -2625,8 +2639,14 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, cmd = DEPEVT_PARAMETER_CMD(event->parameters); if (cmd == DWC3_DEPCMD_ENDTRANSFER) { + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; dwc3_gadget_ep_cleanup_cancelled_requests(dep); + if ((dep->flags & DWC3_EP_DELAY_START) && + !usb_endpoint_xfer_isoc(dep->endpoint.desc)) + __dwc3_gadget_kick_transfer(dep); + + dep->flags &= ~DWC3_EP_DELAY_START; } break; case DWC3_DEPEVT_STREAMEVT: @@ -2683,7 +2703,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, u32 cmd; int ret; - if (!(dep->flags & DWC3_EP_TRANSFER_STARTED)) + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || + (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; /* @@ -2728,6 +2749,8 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, if (!interrupt) dep->flags &= ~DWC3_EP_TRANSFER_STARTED; + else + dep->flags |= DWC3_EP_END_TRANSFER_PENDING; if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A) udelay(100); diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c index 5567ed2cddbe..fa252870c926 100644 --- a/drivers/usb/dwc3/host.c +++ b/drivers/usb/dwc3/host.c @@ -88,10 +88,10 @@ int dwc3_host_init(struct dwc3 *dwc) memset(props, 0, sizeof(struct property_entry) * ARRAY_SIZE(props)); if (dwc->usb3_lpm_capable) - props[prop_idx++].name = "usb3-lpm-capable"; + props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb3-lpm-capable"); if (dwc->usb2_lpm_disable) - props[prop_idx++].name = "usb2-lpm-disable"; + props[prop_idx++] = PROPERTY_ENTRY_BOOL("usb2-lpm-disable"); /** * WORKAROUND: dwc3 revisions <=3.00a have a limitation @@ -103,7 +103,7 @@ int dwc3_host_init(struct dwc3 *dwc) * This following flag tells XHCI to do just that. */ if (dwc->revision <= DWC3_REVISION_300A) - props[prop_idx++].name = "quirk-broken-port-ped"; + props[prop_idx++] = PROPERTY_ENTRY_BOOL("quirk-broken-port-ped"); if (prop_idx) { ret = platform_device_add_properties(xhci, props); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 3b4f67000315..223f72d4d9ed 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -437,12 +437,14 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - return DIV_ROUND_UP(val, 8); - default: - return DIV_ROUND_UP(val, 2); - } + if (speed < USB_SPEED_SUPER) + return min(val, 500U) / 2; + else + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -854,6 +856,10 @@ static int set_config(struct usb_composite_dev *cdev, /* when we return, be sure our power usage is valid */ power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + power = min(power, 500U); + else + power = min(power, 900U); done: usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2280,7 +2286,7 @@ void composite_resume(struct usb_gadget *gadget) { struct usb_composite_dev *cdev = get_gadget_data(gadget); struct usb_function *f; - u16 maxpower; + unsigned maxpower; /* REVISIT: should we have config level * suspend/resume callbacks? @@ -2294,10 +2300,14 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); - usb_gadget_vbus_draw(gadget, maxpower ? - maxpower : CONFIG_USB_GADGET_VBUS_DRAW); + usb_gadget_vbus_draw(gadget, maxpower); } cdev->suspended = 0; diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 460d5d7c984f..7f5cf488b2b1 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -52,6 +52,7 @@ struct f_ecm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; /* FIXME is_open needs some irq-ish locking @@ -380,7 +381,7 @@ static void ecm_do_notify(struct f_ecm *ecm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ecm->notify_count)) return; event = req->buf; @@ -420,10 +421,10 @@ static void ecm_do_notify(struct f_ecm *ecm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ecm->ctrl_id); - ecm->notify_req = NULL; + atomic_inc(&ecm->notify_count); status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC); if (status < 0) { - ecm->notify_req = req; + atomic_dec(&ecm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -448,17 +449,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req) switch (req->status) { case 0: /* no fault */ + atomic_dec(&ecm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ecm->notify_count, 0); ecm->notify_state = ECM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ecm->notify_count); break; } - ecm->notify_req = req; ecm_do_notify(ecm); } @@ -907,6 +910,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) usb_free_all_descriptors(f); + if (atomic_read(&ecm->notify_count)) { + usb_ep_dequeue(ecm->notify, ecm->notify_req); + atomic_set(&ecm->notify_count, 0); + } + kfree(ecm->notify_req->buf); usb_ep_free_request(ecm->notify, ecm->notify_req); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 0bbccac94d6c..bdac92d3a8d0 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1062,6 +1062,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) req->num_sgs = io_data->sgt.nents; } else { req->buf = data; + req->num_sgs = 0; } req->length = data_len; @@ -1105,6 +1106,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) req->num_sgs = io_data->sgt.nents; } else { req->buf = data; + req->num_sgs = 0; } req->length = data_len; @@ -1160,18 +1162,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + unsigned long flags; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); return value; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 2d6e76e4cffa..1d900081b1f0 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -53,6 +53,7 @@ struct f_ncm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; const struct ndp_parser_opts *parser_opts; @@ -547,7 +548,7 @@ static void ncm_do_notify(struct f_ncm *ncm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ncm->notify_count)) return; event = req->buf; @@ -587,7 +588,8 @@ static void ncm_do_notify(struct f_ncm *ncm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ncm->ctrl_id); - ncm->notify_req = NULL; + atomic_inc(&ncm->notify_count); + /* * In double buffering if there is a space in FIFO, * completion callback can be called right after the call, @@ -597,7 +599,7 @@ static void ncm_do_notify(struct f_ncm *ncm) status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC); spin_lock(&ncm->lock); if (status < 0) { - ncm->notify_req = req; + atomic_dec(&ncm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -632,17 +634,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req) case 0: VDBG(cdev, "Notification %02x sent\n", event->bNotificationType); + atomic_dec(&ncm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ncm->notify_count, 0); ncm->notify_state = NCM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ncm->notify_count); break; } - ncm->notify_req = req; ncm_do_notify(ncm); spin_unlock(&ncm->lock); } @@ -1649,6 +1653,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); + if (atomic_read(&ncm->notify_count)) { + usb_ep_dequeue(ncm->notify, ncm->notify_req); + atomic_set(&ncm->notify_count, 0); + } + kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); } diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index f986e5c55974..8167d379e115 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -561,8 +561,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c index da1c37933ca1..8d7a556ece30 100644 --- a/drivers/usb/gadget/legacy/cdc2.c +++ b/drivers/usb/gadget/legacy/cdc2.c @@ -225,7 +225,7 @@ static struct usb_composite_driver cdc_driver = { .name = "g_cdc", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = cdc_bind, .unbind = cdc_unbind, }; diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c index b640ed3fcf70..ae6d8f7092b8 100644 --- a/drivers/usb/gadget/legacy/g_ffs.c +++ b/drivers/usb/gadget/legacy/g_ffs.c @@ -149,7 +149,7 @@ static struct usb_composite_driver gfs_driver = { .name = DRIVER_NAME, .dev = &gfs_dev_desc, .strings = gfs_dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gfs_bind, .unbind = gfs_unbind, }; diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c index 50515f9e1022..ec9749845660 100644 --- a/drivers/usb/gadget/legacy/multi.c +++ b/drivers/usb/gadget/legacy/multi.c @@ -482,7 +482,7 @@ static struct usb_composite_driver multi_driver = { .name = "g_multi", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = multi_bind, .unbind = multi_unbind, .needs_serial = 1, diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c index 8465f081e921..c61e71ba7045 100644 --- a/drivers/usb/gadget/legacy/ncm.c +++ b/drivers/usb/gadget/legacy/ncm.c @@ -197,7 +197,7 @@ static struct usb_composite_driver ncm_driver = { .name = "g_ncm", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gncm_bind, .unbind = gncm_unbind, }; diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 64d80c65bb96..aaf975c809bf 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -2175,8 +2175,6 @@ static int gr_probe(struct platform_device *pdev) return -ENOMEM; } - spin_lock(&dev->lock); - /* Inside lock so that no gadget can use this udc until probe is done */ retval = usb_add_gadget_udc(dev->dev, &dev->gadget); if (retval) { @@ -2185,15 +2183,21 @@ static int gr_probe(struct platform_device *pdev) } dev->added = 1; + spin_lock(&dev->lock); + retval = gr_udc_init(dev); - if (retval) + if (retval) { + spin_unlock(&dev->lock); goto out; - - gr_dfs_create(dev); + } /* Clear all interrupt enables that might be left on since last boot */ gr_disable_interrupts_and_pullup(dev); + spin_unlock(&dev->lock); + + gr_dfs_create(dev); + retval = gr_request_irq(dev, dev->irq); if (retval) { dev_err(dev->dev, "Failed to request irq %d\n", dev->irq); @@ -2222,8 +2226,6 @@ static int gr_probe(struct platform_device *pdev) dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq); out: - spin_unlock(&dev->lock); - if (retval) gr_remove(pdev); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 7a3a29e5e9d2..af92b2576fe9 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -55,6 +55,7 @@ static u8 usb_bos_descriptor [] = { static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, u16 wLength) { + struct xhci_port_cap *port_cap = NULL; int i, ssa_count; u32 temp; u16 desc_size, ssp_cap_size, ssa_size = 0; @@ -64,16 +65,24 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, ssp_cap_size = sizeof(usb_bos_descriptor) - desc_size; /* does xhci support USB 3.1 Enhanced SuperSpeed */ - if (xhci->usb3_rhub.min_rev >= 0x01) { + for (i = 0; i < xhci->num_port_caps; i++) { + if (xhci->port_caps[i].maj_rev == 0x03 && + xhci->port_caps[i].min_rev >= 0x01) { + usb3_1 = true; + port_cap = &xhci->port_caps[i]; + break; + } + } + + if (usb3_1) { /* does xhci provide a PSI table for SSA speed attributes? */ - if (xhci->usb3_rhub.psi_count) { + if (port_cap->psi_count) { /* two SSA entries for each unique PSI ID, RX and TX */ - ssa_count = xhci->usb3_rhub.psi_uid_count * 2; + ssa_count = port_cap->psi_uid_count * 2; ssa_size = ssa_count * sizeof(u32); ssp_cap_size -= 16; /* skip copying the default SSA */ } desc_size += ssp_cap_size; - usb3_1 = true; } memcpy(buf, &usb_bos_descriptor, min(desc_size, wLength)); @@ -99,7 +108,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, } /* If PSI table exists, add the custom speed attributes from it */ - if (usb3_1 && xhci->usb3_rhub.psi_count) { + if (usb3_1 && port_cap->psi_count) { u32 ssp_cap_base, bm_attrib, psi, psi_mant, psi_exp; int offset; @@ -111,7 +120,7 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, /* attribute count SSAC bits 4:0 and ID count SSIC bits 8:5 */ bm_attrib = (ssa_count - 1) & 0x1f; - bm_attrib |= (xhci->usb3_rhub.psi_uid_count - 1) << 5; + bm_attrib |= (port_cap->psi_uid_count - 1) << 5; put_unaligned_le32(bm_attrib, &buf[ssp_cap_base + 4]); if (wLength < desc_size + ssa_size) @@ -124,8 +133,8 @@ static int xhci_create_usb3_bos_desc(struct xhci_hcd *xhci, char *buf, * USB 3.1 requires two SSA entries (RX and TX) for every link */ offset = desc_size; - for (i = 0; i < xhci->usb3_rhub.psi_count; i++) { - psi = xhci->usb3_rhub.psi[i]; + for (i = 0; i < port_cap->psi_count; i++) { + psi = port_cap->psi[i]; psi &= ~USB_SSP_SUBLINK_SPEED_RSVD; psi_exp = XHCI_EXT_PORT_PSIE(psi); psi_mant = XHCI_EXT_PORT_PSIM(psi); diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 3b1388fa2f36..884c601bfa15 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1475,9 +1475,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; @@ -1909,17 +1915,17 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->usb3_rhub.num_ports = 0; xhci->num_active_eps = 0; kfree(xhci->usb2_rhub.ports); - kfree(xhci->usb2_rhub.psi); kfree(xhci->usb3_rhub.ports); - kfree(xhci->usb3_rhub.psi); kfree(xhci->hw_ports); kfree(xhci->rh_bw); kfree(xhci->ext_caps); + for (i = 0; i < xhci->num_port_caps; i++) + kfree(xhci->port_caps[i].psi); + kfree(xhci->port_caps); + xhci->num_port_caps = 0; xhci->usb2_rhub.ports = NULL; - xhci->usb2_rhub.psi = NULL; xhci->usb3_rhub.ports = NULL; - xhci->usb3_rhub.psi = NULL; xhci->hw_ports = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; @@ -2120,6 +2126,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, u8 major_revision, minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; + struct xhci_port_cap *port_cap; temp = readl(addr); major_revision = XHCI_EXT_PORT_MAJOR(temp); @@ -2154,31 +2161,39 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* WTF? "Valid values are β€˜1’ to MaxPorts" */ return; - rhub->psi_count = XHCI_EXT_PORT_PSIC(temp); - if (rhub->psi_count) { - rhub->psi = kcalloc_node(rhub->psi_count, sizeof(*rhub->psi), - GFP_KERNEL, dev_to_node(dev)); - if (!rhub->psi) - rhub->psi_count = 0; + port_cap = &xhci->port_caps[xhci->num_port_caps++]; + if (xhci->num_port_caps > max_caps) + return; + + port_cap->maj_rev = major_revision; + port_cap->min_rev = minor_revision; + port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); - rhub->psi_uid_count++; - for (i = 0; i < rhub->psi_count; i++) { - rhub->psi[i] = readl(addr + 4 + i); + if (port_cap->psi_count) { + port_cap->psi = kcalloc_node(port_cap->psi_count, + sizeof(*port_cap->psi), + GFP_KERNEL, dev_to_node(dev)); + if (!port_cap->psi) + port_cap->psi_count = 0; + + port_cap->psi_uid_count++; + for (i = 0; i < port_cap->psi_count; i++) { + port_cap->psi[i] = readl(addr + 4 + i); /* count unique ID values, two consecutive entries can * have the same ID if link is assymetric */ - if (i && (XHCI_EXT_PORT_PSIV(rhub->psi[i]) != - XHCI_EXT_PORT_PSIV(rhub->psi[i - 1]))) - rhub->psi_uid_count++; + if (i && (XHCI_EXT_PORT_PSIV(port_cap->psi[i]) != + XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) + port_cap->psi_uid_count++; xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", - XHCI_EXT_PORT_PSIV(rhub->psi[i]), - XHCI_EXT_PORT_PSIE(rhub->psi[i]), - XHCI_EXT_PORT_PLT(rhub->psi[i]), - XHCI_EXT_PORT_PFD(rhub->psi[i]), - XHCI_EXT_PORT_LP(rhub->psi[i]), - XHCI_EXT_PORT_PSIM(rhub->psi[i])); + XHCI_EXT_PORT_PSIV(port_cap->psi[i]), + XHCI_EXT_PORT_PSIE(port_cap->psi[i]), + XHCI_EXT_PORT_PLT(port_cap->psi[i]), + XHCI_EXT_PORT_PFD(port_cap->psi[i]), + XHCI_EXT_PORT_LP(port_cap->psi[i]), + XHCI_EXT_PORT_PSIM(port_cap->psi[i])); } } /* cache usb2 port capabilities */ @@ -2213,6 +2228,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, continue; } hw_port->rhub = rhub; + hw_port->port_cap = port_cap; rhub->num_ports++; } /* FIXME: Should we disable ports not in the Extended Capabilities? */ @@ -2303,6 +2319,11 @@ static int xhci_setup_port_arrays(struct xhci_hcd *xhci, gfp_t flags) if (!xhci->ext_caps) return -ENOMEM; + xhci->port_caps = kcalloc_node(cap_count, sizeof(*xhci->port_caps), + flags, dev_to_node(dev)); + if (!xhci->port_caps) + return -ENOMEM; + offset = cap_start; while (offset) { diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4917c5b033fa..1fddc41fa1f3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -49,6 +49,7 @@ #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_XHCI 0x15ec #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI 0x15f0 #define PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI 0x8a13 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -135,7 +136,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_AMD_PLL_FIX; if (pdev->vendor == PCI_VENDOR_ID_AMD && - (pdev->device == 0x15e0 || + (pdev->device == 0x145c || + pdev->device == 0x15e0 || pdev->device == 0x15e1 || pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; @@ -187,7 +189,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -302,6 +305,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -359,9 +365,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index d90cd5ec09cf..315b4552693c 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -445,6 +445,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = &xhci_plat_pm_ops, diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index bf9065438320..7b0d175ad708 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1432,6 +1432,7 @@ MODULE_FIRMWARE("nvidia/tegra210/xusb.bin"); static const char * const tegra186_supply_names[] = { }; +MODULE_FIRMWARE("nvidia/tegra186/xusb.bin"); static const struct tegra_xusb_phy_type tegra186_phy_types[] = { { .name = "usb3", .num = 3, }, diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 56eb867803a6..b19582b2a72c 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb, ), TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x", __entry->epnum, __entry->dir_in ? "in" : "out", - ({ char *s; - switch (__entry->type) { - case USB_ENDPOINT_XFER_INT: - s = "intr"; - break; - case USB_ENDPOINT_XFER_CONTROL: - s = "control"; - break; - case USB_ENDPOINT_XFER_BULK: - s = "bulk"; - break; - case USB_ENDPOINT_XFER_ISOC: - s = "isoc"; - break; - default: - s = "UNKNOWN"; - } s; }), __entry->urb, __entry->pipe, __entry->slot_id, + __print_symbolic(__entry->type, + { USB_ENDPOINT_XFER_INT, "intr" }, + { USB_ENDPOINT_XFER_CONTROL, "control" }, + { USB_ENDPOINT_XFER_BULK, "bulk" }, + { USB_ENDPOINT_XFER_ISOC, "isoc" }), + __entry->urb, __entry->pipe, __entry->slot_id, __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream, __entry->flags ) diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 13d8838cd552..3ecee10fdcdc 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1702,12 +1702,20 @@ struct xhci_bus_state { * Intel Lynx Point LP xHCI host. */ #define XHCI_MAX_REXIT_TIMEOUT_MS 20 +struct xhci_port_cap { + u32 *psi; /* array of protocol speed ID entries */ + u8 psi_count; + u8 psi_uid_count; + u8 maj_rev; + u8 min_rev; +}; struct xhci_port { __le32 __iomem *addr; int hw_portnum; int hcd_portnum; struct xhci_hub *rhub; + struct xhci_port_cap *port_cap; }; struct xhci_hub { @@ -1719,9 +1727,6 @@ struct xhci_hub { /* supported prococol extended capabiliy values */ u8 maj_rev; u8 min_rev; - u32 *psi; /* array of protocol speed ID entries */ - u8 psi_count; - u8 psi_uid_count; }; /* There is one xhci_hcd structure per controller */ @@ -1880,6 +1885,9 @@ struct xhci_hcd { /* cached usb2 extened protocol capabilites */ u32 *ext_caps; unsigned int num_ext_caps; + /* cached extended protocol port capabilities */ + struct xhci_port_cap *port_caps; + unsigned int num_port_caps; /* Compliance Mode Recovery Data */ struct timer_list comp_mode_recovery_timer; u32 port_status_u0; diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index dce44fbf031f..dce20301e367 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -33,6 +33,14 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 + +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS @@ -133,6 +141,11 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -357,6 +370,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -371,6 +385,10 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -493,6 +511,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -767,7 +786,11 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -780,7 +803,11 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 10c9e7f6273e..29fe5771c21b 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -424,10 +424,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, return err; } - hub->vdd = devm_regulator_get(dev, "vdd"); - if (IS_ERR(hub->vdd)) - return PTR_ERR(hub->vdd); - if (of_property_read_u16_array(np, "vendor-id", &hub->vendor_id, 1)) hub->vendor_id = USB251XB_DEF_VENDOR_ID; @@ -640,6 +636,13 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, } #endif /* CONFIG_OF */ +static void usb251xb_regulator_disable_action(void *data) +{ + struct usb251xb *hub = data; + + regulator_disable(hub->vdd); +} + static int usb251xb_probe(struct usb251xb *hub) { struct device *dev = hub->dev; @@ -676,10 +679,19 @@ static int usb251xb_probe(struct usb251xb *hub) if (err) return err; + hub->vdd = devm_regulator_get(dev, "vdd"); + if (IS_ERR(hub->vdd)) + return PTR_ERR(hub->vdd); + err = regulator_enable(hub->vdd); if (err) return err; + err = devm_add_action_or_reset(dev, + usb251xb_regulator_disable_action, hub); + if (err) + return err; + err = usb251xb_connect(hub); if (err) { dev_err(dev, "Failed to connect hub (%d)\n", err); diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index a3d2fef67746..5c93226e0e20 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -361,8 +361,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_vbus = omap2430_musb_set_vbus, - .enable = omap2430_musb_enable, .disable = omap2430_musb_disable, diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index d3f420f3a083..c5ecdcd51ffc 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -205,6 +205,16 @@ static int ch341_get_divisor(speed_t speed) 16 * speed - 16 * CH341_CLKRATE / (clk_div * (div + 1))) div++; + /* + * Prefer lower base clock (fact = 0) if even divisor. + * + * Note that this makes the receiver more tolerant to errors. + */ + if (fact == 1 && div % 2 == 0) { + div /= 2; + fact = 0; + } + return (0x100 - div) << 8 | fact << 2 | ps; } diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 302eb9530859..627bea7e6cfb 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -45,9 +45,10 @@ static int buffer_size; static int xbof = -1; static int ir_startup (struct usb_serial *serial); -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port); -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size); +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count); +static int ir_write_room(struct tty_struct *tty); +static void ir_write_bulk_callback(struct urb *urb); static void ir_process_read_urb(struct urb *urb); static void ir_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios); @@ -77,8 +78,9 @@ static struct usb_serial_driver ir_device = { .num_ports = 1, .set_termios = ir_set_termios, .attach = ir_startup, - .open = ir_open, - .prepare_write_buffer = ir_prepare_write_buffer, + .write = ir_write, + .write_room = ir_write_room, + .write_bulk_callback = ir_write_bulk_callback, .process_read_urb = ir_process_read_urb, }; @@ -195,6 +197,9 @@ static int ir_startup(struct usb_serial *serial) struct usb_irda_cs_descriptor *irda_desc; int rates; + if (serial->num_bulk_in < 1 || serial->num_bulk_out < 1) + return -ENODEV; + irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { dev_err(&serial->dev->dev, @@ -251,35 +256,102 @@ static int ir_startup(struct usb_serial *serial) return 0; } -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port) +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count) { - int i; + struct urb *urb = NULL; + unsigned long flags; + int ret; - for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) - port->write_urbs[i]->transfer_flags = URB_ZERO_PACKET; + if (port->bulk_out_size == 0) + return -EINVAL; - /* Start reading from the device */ - return usb_serial_generic_open(tty, port); -} + if (count == 0) + return 0; -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size) -{ - unsigned char *buf = dest; - int count; + count = min(count, port->bulk_out_size - 1); + + spin_lock_irqsave(&port->lock, flags); + if (__test_and_clear_bit(0, &port->write_urbs_free)) { + urb = port->write_urbs[0]; + port->tx_bytes += count; + } + spin_unlock_irqrestore(&port->lock, flags); + + if (!urb) + return 0; /* * The first byte of the packet we send to the device contains an - * inbound header which indicates an additional number of BOFs and + * outbound header which indicates an additional number of BOFs and * a baud rate change. * * See section 5.4.2.2 of the USB IrDA spec. */ - *buf = ir_xbof | ir_baud; + *(u8 *)urb->transfer_buffer = ir_xbof | ir_baud; + + memcpy(urb->transfer_buffer + 1, buf, count); + + urb->transfer_buffer_length = count + 1; + urb->transfer_flags = URB_ZERO_PACKET; + + ret = usb_submit_urb(urb, GFP_ATOMIC); + if (ret) { + dev_err(&port->dev, "failed to submit write urb: %d\n", ret); + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= count; + spin_unlock_irqrestore(&port->lock, flags); + + return ret; + } + + return count; +} + +static void ir_write_bulk_callback(struct urb *urb) +{ + struct usb_serial_port *port = urb->context; + int status = urb->status; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= urb->transfer_buffer_length - 1; + spin_unlock_irqrestore(&port->lock, flags); + + switch (status) { + case 0: + break; + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + dev_dbg(&port->dev, "write urb stopped: %d\n", status); + return; + case -EPIPE: + dev_err(&port->dev, "write urb stopped: %d\n", status); + return; + default: + dev_err(&port->dev, "nonzero write-urb status: %d\n", status); + break; + } + + usb_serial_port_softint(port); +} + +static int ir_write_room(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + int count = 0; + + if (port->bulk_out_size == 0) + return 0; + + if (test_bit(0, &port->write_urbs_free)) + count = port->bulk_out_size - 1; - count = kfifo_out_locked(&port->write_fifo, buf + 1, size - 1, - &port->lock); - return count + 1; + return count; } static void ir_process_read_urb(struct urb *urb) @@ -332,34 +404,34 @@ static void ir_set_termios(struct tty_struct *tty, switch (baud) { case 2400: - ir_baud = USB_IRDA_BR_2400; + ir_baud = USB_IRDA_LS_2400; break; case 9600: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; break; case 19200: - ir_baud = USB_IRDA_BR_19200; + ir_baud = USB_IRDA_LS_19200; break; case 38400: - ir_baud = USB_IRDA_BR_38400; + ir_baud = USB_IRDA_LS_38400; break; case 57600: - ir_baud = USB_IRDA_BR_57600; + ir_baud = USB_IRDA_LS_57600; break; case 115200: - ir_baud = USB_IRDA_BR_115200; + ir_baud = USB_IRDA_LS_115200; break; case 576000: - ir_baud = USB_IRDA_BR_576000; + ir_baud = USB_IRDA_LS_576000; break; case 1152000: - ir_baud = USB_IRDA_BR_1152000; + ir_baud = USB_IRDA_LS_1152000; break; case 4000000: - ir_baud = USB_IRDA_BR_4000000; + ir_baud = USB_IRDA_LS_4000000; break; default: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; baud = 9600; } diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 084cc2fff3ae..0b5dcf973d94 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1183,6 +1183,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index aab737e1e7b6..c5a2995dfa2e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index a019ea7e6e0e..52db5519aaf0 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -130,6 +130,7 @@ #define HP_LM920_PRODUCT_ID 0x026b #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 +#define HP_LD381_PRODUCT_ID 0x0f7f #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 95bba3ba6ac6..3670fda02c34 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -45,6 +45,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -114,6 +115,17 @@ static void uas_do_work(struct work_struct *work) spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -982,6 +994,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -998,7 +1011,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1166,6 +1181,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 1cd9b6305b06..1880f3e13f57 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1258,6 +1258,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index ed8655c6af8c..b498960ff72b 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -1666,7 +1666,7 @@ static const struct property_entry port_props[] = { PROPERTY_ENTRY_STRING("try-power-role", "sink"), PROPERTY_ENTRY_U32_ARRAY("source-pdos", src_pdo), PROPERTY_ENTRY_U32_ARRAY("sink-pdos", snk_pdo), - PROPERTY_ENTRY_U32("op-sink-microwatt", 2500), + PROPERTY_ENTRY_U32("op-sink-microwatt", 2500000), { } }; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 8b4ff9fff340..753645bb2527 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -591,6 +591,12 @@ static int tcpci_probe(struct i2c_client *client, static int tcpci_remove(struct i2c_client *client) { struct tcpci_chip *chip = i2c_get_clientdata(client); + int err; + + /* Disable chip interrupts before unregistering port */ + err = tcpci_write16(chip->tcpci, TCPC_ALERT_MASK, 0); + if (err < 0) + return err; tcpci_unregister_port(chip->tcpci); diff --git a/drivers/usb/typec/tcpm/wcove.c b/drivers/usb/typec/tcpm/wcove.c index edc271da14f4..9b745f432c91 100644 --- a/drivers/usb/typec/tcpm/wcove.c +++ b/drivers/usb/typec/tcpm/wcove.c @@ -597,7 +597,7 @@ static const struct property_entry wcove_props[] = { PROPERTY_ENTRY_STRING("try-power-role", "sink"), PROPERTY_ENTRY_U32_ARRAY("source-pdos", src_pdo), PROPERTY_ENTRY_U32_ARRAY("sink-pdos", snk_pdo), - PROPERTY_ENTRY_U32("op-sink-microwatt", 15000), + PROPERTY_ENTRY_U32("op-sink-microwatt", 15000000), { } }; diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index d4d5189edfb8..77655d9ce549 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -271,6 +271,9 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) return; dp = typec_altmode_get_drvdata(alt); + if (!dp) + return; + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; @@ -285,6 +288,8 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, struct typec_altmode *alt; struct ucsi_dp *dp; + mutex_lock(&con->lock); + /* We can't rely on the firmware with the capabilities. */ desc->vdo |= DP_CAP_DP_SIGNALING | DP_CAP_RECEPTACLE; @@ -293,12 +298,15 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, desc->vdo |= all_assignments << 16; alt = typec_port_register_altmode(con->port, desc); - if (IS_ERR(alt)) + if (IS_ERR(alt)) { + mutex_unlock(&con->lock); return alt; + } dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL); if (!dp) { typec_unregister_altmode(alt); + mutex_unlock(&con->lock); return ERR_PTR(-ENOMEM); } @@ -311,5 +319,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, alt->ops = &ucsi_displayport_ops; typec_altmode_set_drvdata(alt, dp); + mutex_unlock(&con->lock); + return alt; } diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c index f2983f0f84be..3f5f8198a6bb 100644 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c @@ -97,8 +97,10 @@ static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev, /* If there were any mappings at all... */ if (data->mm) { - ret = mm_iommu_put(data->mm, data->mem); - WARN_ON(ret); + if (data->mem) { + ret = mm_iommu_put(data->mm, data->mem); + WARN_ON(ret); + } mmdrop(data->mm); } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e158159671fa..18e205eeb9af 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1414,10 +1414,6 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; int r; struct socket *sock = sockfd_lookup(fd, &r); @@ -1430,11 +1426,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, 0); - if (r < 0) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c index d46052d8ff41..3d276b30a78c 100644 --- a/drivers/video/backlight/qcom-wled.c +++ b/drivers/video/backlight/qcom-wled.c @@ -956,8 +956,8 @@ static int wled_configure(struct wled *wled, int version) struct wled_config *cfg = &wled->cfg; struct device *dev = wled->dev; const __be32 *prop_addr; - u32 size, val, c, string_len; - int rc, i, j; + u32 size, val, c; + int rc, i, j, string_len; const struct wled_u32_opts *u32_opts = NULL; const struct wled_u32_opts wled3_opts[] = { diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index de7b8382aba9..998b0de1812f 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1316,6 +1316,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index aa9541bf964b..f65991a67af2 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -2215,6 +2215,7 @@ config FB_HYPERV select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT select FB_DEFERRED_IO + select DMA_CMA if HAVE_DMA_CONTIGUOUS && CMA help This framebuffer driver supports Microsoft Hyper-V Synthetic Video. diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 4cd27e5172a1..8cf39d98b2bd 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -31,6 +31,16 @@ * "set-vmvideo" command. For example * set-vmvideo -vmname name -horizontalresolution:1920 \ * -verticalresolution:1200 -resolutiontype single + * + * Gen 1 VMs also support direct using VM's physical memory for framebuffer. + * It could improve the efficiency and performance for framebuffer and VM. + * This requires to allocate contiguous physical memory from Linux kernel's + * CMA memory allocator. To enable this, supply a kernel parameter to give + * enough memory space to CMA allocator for framebuffer. For example: + * cma=130m + * This gives 130MB memory to CMA allocator that can be allocated to + * framebuffer. For reference, 8K resolution (7680x4320) takes about + * 127MB memory. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -228,7 +238,6 @@ struct synthvid_msg { } __packed; - /* FB driver definitions and structures */ #define HVFB_WIDTH 1152 /* default screen width */ #define HVFB_HEIGHT 864 /* default screen height */ @@ -258,12 +267,15 @@ struct hvfb_par { /* If true, the VSC notifies the VSP on every framebuffer change */ bool synchronous_fb; + /* If true, need to copy from deferred IO mem to framebuffer mem */ + bool need_docopy; + struct notifier_block hvfb_panic_nb; /* Memory for deferred IO and frame buffer itself */ unsigned char *dio_vp; unsigned char *mmio_vp; - unsigned long mmio_pp; + phys_addr_t mmio_pp; /* Dirty rectangle, protected by delayed_refresh_lock */ int x1, y1, x2, y2; @@ -434,7 +446,7 @@ static void synthvid_deferred_io(struct fb_info *p, maxy = max_t(int, maxy, y2); /* Copy from dio space to mmio address */ - if (par->fb_ready) + if (par->fb_ready && par->need_docopy) hvfb_docopy(par, start, PAGE_SIZE); } @@ -751,12 +763,12 @@ static void hvfb_update_work(struct work_struct *w) return; /* Copy the dirty rectangle to frame buffer memory */ - for (j = y1; j < y2; j++) { - hvfb_docopy(par, - j * info->fix.line_length + - (x1 * screen_depth / 8), - (x2 - x1) * screen_depth / 8); - } + if (par->need_docopy) + for (j = y1; j < y2; j++) + hvfb_docopy(par, + j * info->fix.line_length + + (x1 * screen_depth / 8), + (x2 - x1) * screen_depth / 8); /* Refresh */ if (par->fb_ready && par->update) @@ -801,7 +813,8 @@ static int hvfb_on_panic(struct notifier_block *nb, par = container_of(nb, struct hvfb_par, hvfb_panic_nb); par->synchronous_fb = true; info = par->info; - hvfb_docopy(par, 0, dio_fb_size); + if (par->need_docopy) + hvfb_docopy(par, 0, dio_fb_size); synthvid_update(info, 0, 0, INT_MAX, INT_MAX); return NOTIFY_DONE; @@ -940,6 +953,62 @@ static void hvfb_get_option(struct fb_info *info) return; } +/* + * Allocate enough contiguous physical memory. + * Return physical address if succeeded or -1 if failed. + */ +static phys_addr_t hvfb_get_phymem(struct hv_device *hdev, + unsigned int request_size) +{ + struct page *page = NULL; + dma_addr_t dma_handle; + void *vmem; + phys_addr_t paddr = 0; + unsigned int order = get_order(request_size); + + if (request_size == 0) + return -1; + + if (order < MAX_ORDER) { + /* Call alloc_pages if the size is less than 2^MAX_ORDER */ + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return -1; + + paddr = (page_to_pfn(page) << PAGE_SHIFT); + } else { + /* Allocate from CMA */ + hdev->device.coherent_dma_mask = DMA_BIT_MASK(64); + + vmem = dma_alloc_coherent(&hdev->device, + round_up(request_size, PAGE_SIZE), + &dma_handle, + GFP_KERNEL | __GFP_NOWARN); + + if (!vmem) + return -1; + + paddr = virt_to_phys(vmem); + } + + return paddr; +} + +/* Release contiguous physical memory */ +static void hvfb_release_phymem(struct hv_device *hdev, + phys_addr_t paddr, unsigned int size) +{ + unsigned int order = get_order(size); + + if (order < MAX_ORDER) + __free_pages(pfn_to_page(paddr >> PAGE_SHIFT), order); + else + dma_free_coherent(&hdev->device, + round_up(size, PAGE_SIZE), + phys_to_virt(paddr), + paddr); +} + /* Get framebuffer memory from Hyper-V video pci space */ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) @@ -949,22 +1018,61 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) void __iomem *fb_virt; int gen2vm = efi_enabled(EFI_BOOT); resource_size_t pot_start, pot_end; + phys_addr_t paddr; int ret; - dio_fb_size = - screen_width * screen_height * screen_depth / 8; + info->apertures = alloc_apertures(1); + if (!info->apertures) + return -ENOMEM; - if (gen2vm) { - pot_start = 0; - pot_end = -1; - } else { + if (!gen2vm) { pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, - PCI_DEVICE_ID_HYPERV_VIDEO, NULL); + PCI_DEVICE_ID_HYPERV_VIDEO, NULL); if (!pdev) { pr_err("Unable to find PCI Hyper-V video\n"); + kfree(info->apertures); return -ENODEV; } + info->apertures->ranges[0].base = pci_resource_start(pdev, 0); + info->apertures->ranges[0].size = pci_resource_len(pdev, 0); + + /* + * For Gen 1 VM, we can directly use the contiguous memory + * from VM. If we succeed, deferred IO happens directly + * on this allocated framebuffer memory, avoiding extra + * memory copy. + */ + paddr = hvfb_get_phymem(hdev, screen_fb_size); + if (paddr != (phys_addr_t) -1) { + par->mmio_pp = paddr; + par->mmio_vp = par->dio_vp = __va(paddr); + + info->fix.smem_start = paddr; + info->fix.smem_len = screen_fb_size; + info->screen_base = par->mmio_vp; + info->screen_size = screen_fb_size; + + par->need_docopy = false; + goto getmem_done; + } + pr_info("Unable to allocate enough contiguous physical memory on Gen 1 VM. Using MMIO instead.\n"); + } else { + info->apertures->ranges[0].base = screen_info.lfb_base; + info->apertures->ranges[0].size = screen_info.lfb_size; + } + + /* + * Cannot use the contiguous physical memory. + * Allocate mmio space for framebuffer. + */ + dio_fb_size = + screen_width * screen_height * screen_depth / 8; + + if (gen2vm) { + pot_start = 0; + pot_end = -1; + } else { if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || pci_resource_len(pdev, 0) < screen_fb_size) { pr_err("Resource not available or (0x%lx < 0x%lx)\n", @@ -993,20 +1101,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) if (par->dio_vp == NULL) goto err3; - info->apertures = alloc_apertures(1); - if (!info->apertures) - goto err4; - - if (gen2vm) { - info->apertures->ranges[0].base = screen_info.lfb_base; - info->apertures->ranges[0].size = screen_info.lfb_size; - remove_conflicting_framebuffers(info->apertures, - KBUILD_MODNAME, false); - } else { - info->apertures->ranges[0].base = pci_resource_start(pdev, 0); - info->apertures->ranges[0].size = pci_resource_len(pdev, 0); - } - /* Physical address of FB device */ par->mmio_pp = par->mem->start; /* Virtual address of FB device */ @@ -1017,13 +1111,15 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) info->screen_base = par->dio_vp; info->screen_size = dio_fb_size; +getmem_done: + remove_conflicting_framebuffers(info->apertures, + KBUILD_MODNAME, false); if (!gen2vm) pci_dev_put(pdev); + kfree(info->apertures); return 0; -err4: - vfree(par->dio_vp); err3: iounmap(fb_virt); err2: @@ -1032,18 +1128,25 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) err1: if (!gen2vm) pci_dev_put(pdev); + kfree(info->apertures); return -ENOMEM; } /* Release the framebuffer */ -static void hvfb_putmem(struct fb_info *info) +static void hvfb_putmem(struct hv_device *hdev, struct fb_info *info) { struct hvfb_par *par = info->par; - vfree(par->dio_vp); - iounmap(info->screen_base); - vmbus_free_mmio(par->mem->start, screen_fb_size); + if (par->need_docopy) { + vfree(par->dio_vp); + iounmap(info->screen_base); + vmbus_free_mmio(par->mem->start, screen_fb_size); + } else { + hvfb_release_phymem(hdev, info->fix.smem_start, + screen_fb_size); + } + par->mem = NULL; } @@ -1062,6 +1165,7 @@ static int hvfb_probe(struct hv_device *hdev, par = info->par; par->info = info; par->fb_ready = false; + par->need_docopy = true; init_completion(&par->wait); INIT_DELAYED_WORK(&par->dwork, hvfb_update_work); @@ -1147,7 +1251,7 @@ static int hvfb_probe(struct hv_device *hdev, error: fb_deferred_io_cleanup(info); - hvfb_putmem(info); + hvfb_putmem(hdev, info); error2: vmbus_close(hdev->channel); error1: @@ -1177,7 +1281,7 @@ static int hvfb_remove(struct hv_device *hdev) vmbus_close(hdev->channel); hv_set_drvdata(hdev, NULL); - hvfb_putmem(info); + hvfb_putmem(hdev, info); framebuffer_release(info); return 0; diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index 1410f476e135..1fc50fc0694b 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -766,8 +766,8 @@ static int pxa168fb_probe(struct platform_device *pdev) failed_free_clk: clk_disable_unprepare(fbi->clk); failed_free_fbmem: - dma_free_coherent(fbi->dev, info->fix.smem_len, - info->screen_base, fbi->fb_start_dma); + dma_free_wc(fbi->dev, info->fix.smem_len, + info->screen_base, fbi->fb_start_dma); failed_free_info: kfree(info); @@ -801,7 +801,7 @@ static int pxa168fb_remove(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), + dma_free_wc(fbi->dev, info->fix.smem_len, info->screen_base, info->fix.smem_start); clk_disable_unprepare(fbi->clk); diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 93f995f6cf36..341458fd95ca 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -158,6 +158,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. @@ -475,7 +477,9 @@ static int init_vqs(struct virtio_balloon *vb) names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate"; callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack; names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate"; + callbacks[VIRTIO_BALLOON_VQ_STATS] = NULL; names[VIRTIO_BALLOON_VQ_STATS] = NULL; + callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL; if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { @@ -899,8 +903,7 @@ static int virtballoon_probe(struct virtio_device *vdev) vb->vb_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb); if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); - kern_unmount(balloon_mnt); - goto out_del_vqs; + goto out_kern_unmount; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif @@ -911,13 +914,13 @@ static int virtballoon_probe(struct virtio_device *vdev) */ if (virtqueue_get_vring_size(vb->free_page_vq) < 2) { err = -ENOSPC; - goto out_del_vqs; + goto out_iput; } vb->balloon_wq = alloc_workqueue("balloon-wq", WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0); if (!vb->balloon_wq) { err = -ENOMEM; - goto out_del_vqs; + goto out_iput; } INIT_WORK(&vb->report_free_page_work, report_free_page_func); vb->cmd_id_received_cache = VIRTIO_BALLOON_CMD_ID_STOP; @@ -951,7 +954,13 @@ static int virtballoon_probe(struct virtio_device *vdev) out_del_balloon_wq: if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) destroy_workqueue(vb->balloon_wq); +out_iput: +#ifdef CONFIG_BALLOON_COMPACTION + iput(vb->vb_dev_info.inode); +out_kern_unmount: + kern_unmount(balloon_mnt); out_del_vqs: +#endif vdev->config->del_vqs(vdev); out_free_vb: kfree(vb); @@ -966,6 +975,10 @@ static void remove_common(struct virtio_balloon *vb) leak_balloon(vb, vb->num_pages); update_balloon_size(vb); + /* There might be free pages that are being reported: release them. */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) + return_free_pages_to_mm(vb, ULONG_MAX); + /* Now we reset the device so we can clean up the queues. */ vb->vdev->config->reset(vb->vdev); diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index f2862f66c2ac..222d630c41fc 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -294,7 +294,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) - if (callbacks[i]) + if (names[i] && callbacks[i]) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 867c7ebd3f10..58b96baa8d48 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2203,10 +2203,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) vq->split.queue_size_in_bytes, vq->split.vring.desc, vq->split.queue_dma_addr); - - kfree(vq->split.desc_state); } } + if (!vq->packed_ring) + kfree(vq->split.desc_state); list_del(&_vq->list); kfree(vq); } diff --git a/drivers/visorbus/visorchipset.c b/drivers/visorbus/visorchipset.c index ca752b8f495f..cb1eb7e05f87 100644 --- a/drivers/visorbus/visorchipset.c +++ b/drivers/visorbus/visorchipset.c @@ -1210,14 +1210,17 @@ static void setup_crash_devices_work_queue(struct work_struct *work) { struct controlvm_message local_crash_bus_msg; struct controlvm_message local_crash_dev_msg; - struct controlvm_message msg; + struct controlvm_message msg = { + .hdr.id = CONTROLVM_CHIPSET_INIT, + .cmd.init_chipset = { + .bus_count = 23, + .switch_count = 0, + }, + }; u32 local_crash_msg_offset; u16 local_crash_msg_count; /* send init chipset msg */ - msg.hdr.id = CONTROLVM_CHIPSET_INIT; - msg.cmd.init_chipset.bus_count = 23; - msg.cmd.init_chipset.switch_count = 0; chipset_init(&msg); /* get saved message count */ if (visorchannel_read(chipset_dev->controlvm_channel, diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c index 3208a4409e44..6a1bc284f297 100644 --- a/drivers/vme/bridges/vme_fake.c +++ b/drivers/vme/bridges/vme_fake.c @@ -414,8 +414,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, } } -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u8 retval = 0xff; int i; @@ -446,8 +447,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u16 retval = 0xffff; int i; @@ -478,8 +480,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u32 retval = 0xffffffff; int i; @@ -609,8 +612,9 @@ static ssize_t fake_master_read(struct vme_master_resource *image, void *buf, return retval; } -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge, + u8 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -639,8 +643,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, } -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge, + u16 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -669,8 +674,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, } -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge, + u32 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index e149e66a6ea9..e92f38fcb7a4 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -94,13 +94,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index a494543d3ae1..eb47fe5ed280 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -246,7 +246,7 @@ static int qcom_wdt_probe(struct platform_device *pdev) } /* check if there is pretimeout support */ - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq_optional(pdev, 0); if (irq > 0) { ret = devm_request_irq(dev, irq, qcom_wdt_isr, IRQF_TRIGGER_RISING, diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c index a3a329011a06..25188d6bbe15 100644 --- a/drivers/watchdog/stm32_iwdg.c +++ b/drivers/watchdog/stm32_iwdg.c @@ -262,6 +262,24 @@ static int stm32_iwdg_probe(struct platform_device *pdev) watchdog_set_nowayout(wdd, WATCHDOG_NOWAYOUT); watchdog_init_timeout(wdd, 0, dev); + /* + * In case of CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED is set + * (Means U-Boot/bootloaders leaves the watchdog running) + * When we get here we should make a decision to prevent + * any side effects before user space daemon will take care of it. + * The best option, taking into consideration that there is no + * way to read values back from hardware, is to enforce watchdog + * being run with deterministic values. + */ + if (IS_ENABLED(CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED)) { + ret = stm32_iwdg_start(wdd); + if (ret) + return ret; + + /* Make sure the watchdog is serviced */ + set_bit(WDOG_HW_RUNNING, &wdd->status); + } + ret = devm_watchdog_register_device(dev, wdd); if (ret) return ret; diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 21e8085b848b..861daf4f37b2 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -147,6 +147,25 @@ int watchdog_init_timeout(struct watchdog_device *wdd, } EXPORT_SYMBOL_GPL(watchdog_init_timeout); +static int watchdog_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *data) +{ + struct watchdog_device *wdd; + + wdd = container_of(nb, struct watchdog_device, reboot_nb); + if (code == SYS_DOWN || code == SYS_HALT) { + if (watchdog_active(wdd)) { + int ret; + + ret = wdd->ops->stop(wdd); + if (ret) + return NOTIFY_BAD; + } + } + + return NOTIFY_DONE; +} + static int watchdog_restart_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -235,6 +254,19 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } } + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = register_reboot_notifier(&wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + ida_simple_remove(&watchdog_ida, id); + return ret; + } + } + if (wdd->ops->restart) { wdd->restart_nb.notifier_call = watchdog_restart_notifier; @@ -289,6 +321,9 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd) if (wdd->ops->restart) unregister_restart_handler(&wdd->restart_nb); + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) + unregister_reboot_notifier(&wdd->reboot_nb); + watchdog_dev_unregister(wdd); ida_simple_remove(&watchdog_ida, wdd->id); } diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 4b2a85438478..8b5c742f24e8 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -38,7 +38,6 @@ #include /* For handling misc devices */ #include /* For module stuff/... */ #include /* For mutexes */ -#include /* For reboot notifier */ #include /* For memory functions */ #include /* For standard types (like size_t) */ #include /* For watchdog specific items */ @@ -1097,25 +1096,6 @@ static void watchdog_cdev_unregister(struct watchdog_device *wdd) put_device(&wd_data->dev); } -static int watchdog_reboot_notifier(struct notifier_block *nb, - unsigned long code, void *data) -{ - struct watchdog_device *wdd; - - wdd = container_of(nb, struct watchdog_device, reboot_nb); - if (code == SYS_DOWN || code == SYS_HALT) { - if (watchdog_active(wdd)) { - int ret; - - ret = wdd->ops->stop(wdd); - if (ret) - return NOTIFY_BAD; - } - } - - return NOTIFY_DONE; -} - /* * watchdog_dev_register: register a watchdog device * @wdd: watchdog device @@ -1134,22 +1114,8 @@ int watchdog_dev_register(struct watchdog_device *wdd) return ret; ret = watchdog_register_pretimeout(wdd); - if (ret) { + if (ret) watchdog_cdev_unregister(wdd); - return ret; - } - - if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = devm_register_reboot_notifier(&wdd->wd_data->dev, - &wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - } - } return ret; } diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index b069349b52f5..3065dd670a18 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -54,6 +54,13 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +#define WDAT_DEFAULT_TIMEOUT 30 + +static int timeout = WDAT_DEFAULT_TIMEOUT; +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default=" + __MODULE_STRING(WDAT_DEFAULT_TIMEOUT) ")"); + static int wdat_wdt_read(struct wdat_wdt *wdat, const struct wdat_instruction *instr, u32 *value) { @@ -389,7 +396,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { @@ -438,6 +445,22 @@ static int wdat_wdt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, wdat); + /* + * Set initial timeout so that userspace has time to configure the + * watchdog properly after it has opened the device. In some cases + * the BIOS default is too short and causes immediate reboot. + */ + if (timeout * 1000 < wdat->wdd.min_hw_heartbeat_ms || + timeout * 1000 > wdat->wdd.max_hw_heartbeat_ms) { + dev_warn(dev, "Invalid timeout %d given, using %d\n", + timeout, WDAT_DEFAULT_TIMEOUT); + timeout = WDAT_DEFAULT_TIMEOUT; + } + + ret = wdat_wdt_set_timeout(&wdat->wdd, timeout); + if (ret) + return ret; + watchdog_set_nowayout(&wdat->wdd, nowayout); return devm_watchdog_register_device(dev, &wdat->wdd); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 4fc83e3f5ad3..0258415ca0b2 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1006,19 +1006,19 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) } mutex_unlock(&priv->lock); - /* - * gntdev takes the address of the PTE in find_grant_ptes() and passes - * it to the hypervisor in gntdev_map_grant_pages(). The purpose of - * the notifier is to prevent the hypervisor pointer to the PTE from - * going stale. - * - * Since this vma's mappings can't be touched without the mmap_sem, - * and we are holding it now, there is no need for the notifier_range - * locking pattern. - */ - mmu_interval_read_begin(&map->notifier); - if (use_ptemod) { + /* + * gntdev takes the address of the PTE in find_grant_ptes() and + * passes it to the hypervisor in gntdev_map_grant_pages(). The + * purpose of the notifier is to prevent the hypervisor pointer + * to the PTE from going stale. + * + * Since this vma's mappings can't be touched without the + * mmap_sem, and we are holding it now, there is no need for + * the notifier_range locking pattern. + */ + mmu_interval_read_begin(&map->notifier); + map->pages_vm_start = vma->vm_start; err = apply_to_page_range(vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 8b9919c26095..456a164364a2 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -33,7 +33,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 6d12fc368210..a8d24433c8e9 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch, "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; else - static_max = new_target; + static_max = balloon_stats.current_pages; target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index d239fc3c5e3d..eb5151fc8efa 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -313,6 +313,8 @@ static int process_msg(void) req->msg.type = state.msg.type; req->msg.len = state.msg.len; req->body = state.body; + /* write body, then update state */ + virt_wmb(); req->state = xb_req_state_got_reply; req->cb(req); } else @@ -395,6 +397,8 @@ static int process_writes(void) if (state.req->state == xb_req_state_aborted) kfree(state.req); else { + /* write err, then update state */ + virt_wmb(); state.req->state = xb_req_state_got_reply; wake_up(&state.req->wq); } diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index ddc18da61834..3a06eb699f33 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -191,8 +191,11 @@ static bool xenbus_ok(void) static bool test_reply(struct xb_req_data *req) { - if (req->state == xb_req_state_got_reply || !xenbus_ok()) + if (req->state == xb_req_state_got_reply || !xenbus_ok()) { + /* read req->state before all other fields */ + virt_rmb(); return true; + } /* Make sure to reread req->state each time. */ barrier(); @@ -202,7 +205,7 @@ static bool test_reply(struct xb_req_data *req) static void *read_reply(struct xb_req_data *req) { - while (req->state != xb_req_state_got_reply) { + do { wait_event(req->wq, test_reply(req)); if (!xenbus_ok()) @@ -216,7 +219,7 @@ static void *read_reply(struct xb_req_data *req) if (req->err) return ERR_PTR(req->err); - } + } while (req->state != xb_req_state_got_reply); return req->body; } diff --git a/fs/aio.c b/fs/aio.c index a9fbad2ce5e6..5f3d3d814928 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1610,6 +1610,14 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb, return 0; } +static void aio_poll_put_work(struct work_struct *work) +{ + struct poll_iocb *req = container_of(work, struct poll_iocb, work); + struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); + + iocb_put(iocb); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1674,6 +1682,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del_init(&req->wait.entry); if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + struct kioctx *ctx = iocb->ki_ctx; + /* * Try to complete the iocb inline if we can. Use * irqsave/irqrestore because not all filesystems (e.g. fuse) @@ -1683,8 +1693,14 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); req->done = true; - spin_unlock_irqrestore(&iocb->ki_ctx->ctx_lock, flags); - iocb_put(iocb); + if (iocb->ki_eventfd && eventfd_signal_count()) { + iocb = NULL; + INIT_WORK(&req->work, aio_poll_put_work); + schedule_work(&req->work); + } + spin_unlock_irqrestore(&ctx->ctx_lock, flags); + if (iocb) + iocb_put(iocb); } else { schedule_work(&req->work); } diff --git a/fs/attr.c b/fs/attr.c index df28035aa23e..b4bbdbd4c8ca 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -183,18 +183,12 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -268,8 +262,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de attr->ia_ctime = now; if (!(ia_valid & ATTR_ATIME_SET)) attr->ia_atime = now; + else + attr->ia_atime = timestamp_truncate(attr->ia_atime, inode); if (!(ia_valid & ATTR_MTIME_SET)) attr->ia_mtime = now; + else + attr->ia_mtime = timestamp_truncate(attr->ia_mtime, inode); + if (ia_valid & ATTR_KILL_PRIV) { error = security_inode_need_killpriv(dentry); if (error < 0) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 6934a5b8708f..acf0b7d879bc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -849,9 +849,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) found_raid1c34 = true; up_read(&sinfo->groups_sem); } - if (found_raid56) + if (!found_raid56) btrfs_clear_fs_incompat(fs_info, RAID56); - if (found_raid1c34) + if (!found_raid1c34) btrfs_clear_fs_incompat(fs_info, RAID1C34); } } diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 0b52ab4cb964..72c70f59fc60 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -629,7 +629,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -700,7 +699,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 24658b5a5787..f2ec1a9bae28 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -326,12 +326,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { write_lock(&fs_info->tree_mod_log_lock); - spin_lock(&fs_info->tree_mod_seq_lock); if (!elem->seq) { elem->seq = btrfs_inc_tree_mod_seq(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - spin_unlock(&fs_info->tree_mod_seq_lock); write_unlock(&fs_info->tree_mod_log_lock); return elem->seq; @@ -351,7 +349,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; - spin_lock(&fs_info->tree_mod_seq_lock); + write_lock(&fs_info->tree_mod_log_lock); list_del(&elem->list); elem->seq = 0; @@ -362,19 +360,17 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * blocker with lower sequence number exists, we * cannot remove anything from the log */ - spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); return; } min_seq = cur_elem->seq; } } - spin_unlock(&fs_info->tree_mod_seq_lock); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - write_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 54efb21c2727..2e9f938508e9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -673,14 +673,12 @@ struct btrfs_fs_info { atomic_t nr_delayed_iputs; wait_queue_head_t delayed_iputs_wait; - /* this protects tree_mod_seq_list */ - spinlock_t tree_mod_seq_lock; atomic64_t tree_mod_seq; - struct list_head tree_mod_seq_list; - /* this protects tree_mod_log */ + /* this protects tree_mod_log and tree_mod_seq_list */ rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; + struct list_head tree_mod_seq_list; atomic_t async_delalloc_pages; @@ -3110,17 +3108,21 @@ do { \ rcu_read_unlock(); \ } while (0) -__cold -static inline void assfail(const char *expr, const char *file, int line) +#ifdef CONFIG_BTRFS_ASSERT +__cold __noreturn +static inline void assertfail(const char *expr, const char *file, int line) { - if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) { - pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); - BUG(); - } + pr_err("assertion failed: %s, in %s:%d\n", expr, file, line); + BUG(); } -#define ASSERT(expr) \ - (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) +#define ASSERT(expr) \ + (likely(expr) ? (void)0 : assertfail(#expr, __FILE__, __LINE__)) + +#else +static inline void assertfail(const char *expr, const char* file, int line) { } +#define ASSERT(expr) (void)(expr) +#endif /* * Use that for functions that are conditionally exported for sanity tests but diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index df3bd880061d..dfdb7d4f8406 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -492,7 +492,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, if (head->is_data) return; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { struct seq_list *elem; @@ -500,7 +500,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct seq_list, list); seq = elem->seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); again: for (node = rb_first_cached(&head->ref_tree); node; @@ -518,7 +518,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) struct seq_list *elem; int ret = 0; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); @@ -531,7 +531,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq) } } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0edfdc9c82b..3bb4bc2c9fd1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2691,7 +2691,6 @@ int __cold open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); @@ -3165,6 +3164,7 @@ int __cold open_ctree(struct super_block *sb, /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3200,6 +3200,7 @@ int __cold open_ctree(struct super_block *sb, if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); btrfs_warn(fs_info, "failed to read fs tree: %d", err); + fs_info->fs_root = NULL; goto fail_qgroup; } @@ -4026,11 +4027,18 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - btrfs_free_block_groups(fs_info); - clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); + /* + * We must free the block groups after dropping the fs_roots as we could + * have had an IO error and have left over tree log blocks that aren't + * cleaned up until the fs roots are freed. This makes the block group + * accounting appear to be wrong because there's pending reserved bytes, + * so make sure we do the block group cleanup afterwards. + */ + btrfs_free_block_groups(fs_info); + iput(fs_info->btree_inode); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY @@ -4265,6 +4273,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, cond_resched(); spin_lock(&delayed_refs->lock); } + btrfs_qgroup_destroy_extent_records(trans); spin_unlock(&delayed_refs->lock); @@ -4490,7 +4499,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 274318e9114e..f50341ce5d44 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4430,6 +4430,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, offset, ins, 1); + if (ret) + btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1); btrfs_put_block_group(block_group); return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2f4802f405a2..5e7d6e3463ab 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1593,21 +1593,25 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, /* Find first extent with bits cleared */ while (1) { node = __etree_search(tree, start, &next, &prev, NULL, NULL); - if (!node) { + if (!node && !next && !prev) { + /* + * Tree is completely empty, send full range and let + * caller deal with it + */ + *start_ret = 0; + *end_ret = -1; + goto out; + } else if (!node && !next) { + /* + * We are past the last allocated chunk, set start at + * the end of the last extent. + */ + state = rb_entry(prev, struct extent_state, rb_node); + *start_ret = state->end + 1; + *end_ret = -1; + goto out; + } else if (!node) { node = next; - if (!node) { - /* - * We are past the last allocated chunk, - * set start at the end of the last extent. The - * device alloc tree should never be empty so - * prev is always set. - */ - ASSERT(prev); - state = rb_entry(prev, struct extent_state, rb_node); - *start_ret = state->end + 1; - *end_ret = -1; - goto out; - } } /* * At this point 'node' either contains 'start' or start is @@ -3941,6 +3945,11 @@ int btree_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; + /* + * Start from the beginning does not need to cycle over the + * range, mark it as scanned. + */ + scanned = (index == 0); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; @@ -3958,7 +3967,6 @@ int btree_write_cache_pages(struct address_space *mapping, tag))) { unsigned i; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -4087,6 +4095,11 @@ static int extent_write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ end = -1; + /* + * Start from the beginning does not need to cycle over the + * range, mark it as scanned. + */ + scanned = (index == 0); } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; @@ -4120,7 +4133,6 @@ static int extent_write_cache_pages(struct address_space *mapping, &index, end, tag))) { unsigned i; - scanned = 1; for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; @@ -4180,7 +4192,16 @@ static int extent_write_cache_pages(struct address_space *mapping, */ scanned = 1; index = 0; - goto retry; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + ret = flush_write_bio(epd); + if (!ret) + goto retry; } if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 6f417ff68980..bd6229fb2b6f 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -237,6 +237,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) struct extent_map *merge = NULL; struct rb_node *rb; + /* + * We can't modify an extent map that is in the tree and that is being + * used by another task, as it can cause that other task to see it in + * inconsistent state during the merging. We always have 1 reference for + * the tree and 1 for this task (which is unpinning the extent map or + * clearing the logging flag), so anything > 2 means it's being used by + * other tasks too. + */ + if (refcount_read(&em->refs) > 2) + return; + if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b1bfdc5c1387..6f18333e83c3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -274,7 +274,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio csum += count * csum_size; nblocks -= count; next: - while (count--) { + while (count > 0) { + count--; disk_bytenr += fs_info->sectorsize; offset += fs_info->sectorsize; page_bytes_left -= fs_info->sectorsize; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c70baafb2a39..721effb468e8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2191,6 +2191,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, /* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; + struct inode *inode; struct btrfs_work work; }; @@ -2204,27 +2205,71 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) struct inode *inode; u64 page_start; u64 page_end; - int ret; + int ret = 0; + bool free_delalloc_space = true; fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; + inode = fixup->inode; + page_start = page_offset(page); + page_end = page_offset(page) + PAGE_SIZE - 1; + + /* + * This is similar to page_mkwrite, we need to reserve the space before + * we take the page lock. + */ + ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, + PAGE_SIZE); again: lock_page(page); + + /* + * Before we queued this fixup, we took a reference on the page. + * page->mapping may go NULL, but it shouldn't be moved to a different + * address space. + */ if (!page->mapping || !PageDirty(page) || !PageChecked(page)) { - ClearPageChecked(page); + /* + * Unfortunately this is a little tricky, either + * + * 1) We got here and our page had already been dealt with and + * we reserved our space, thus ret == 0, so we need to just + * drop our space reservation and bail. This can happen the + * first time we come into the fixup worker, or could happen + * while waiting for the ordered extent. + * 2) Our page was already dealt with, but we happened to get an + * ENOSPC above from the btrfs_delalloc_reserve_space. In + * this case we obviously don't have anything to release, but + * because the page was already dealt with we don't want to + * mark the page with an error, so make sure we're resetting + * ret to 0. This is why we have this check _before_ the ret + * check, because we do not want to have a surprise ENOSPC + * when the page was already properly dealt with. + */ + if (!ret) { + btrfs_delalloc_release_extents(BTRFS_I(inode), + PAGE_SIZE); + btrfs_delalloc_release_space(inode, data_reserved, + page_start, PAGE_SIZE, + true); + } + ret = 0; goto out_page; } - inode = page->mapping->host; - page_start = page_offset(page); - page_end = page_offset(page) + PAGE_SIZE - 1; + /* + * We can't mess with the page state unless it is locked, so now that + * it is locked bail if we failed to make our space reservation. + */ + if (ret) + goto out_page; lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); /* already ordered? We're done */ if (PagePrivate2(page)) - goto out; + goto out_reserved; ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, PAGE_SIZE); @@ -2237,39 +2282,49 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) goto again; } - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, - PAGE_SIZE); - if (ret) { - mapping_set_error(page->mapping, ret); - end_extent_writepage(page, ret, page_start, page_end); - ClearPageChecked(page); - goto out; - } - ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0, &cached_state); - if (ret) { - mapping_set_error(page->mapping, ret); - end_extent_writepage(page, ret, page_start, page_end); - ClearPageChecked(page); + if (ret) goto out_reserved; - } - ClearPageChecked(page); - set_page_dirty(page); + /* + * Everything went as planned, we're now the owner of a dirty page with + * delayed allocation bits set and space reserved for our COW + * destination. + * + * The page was dirty when we started, nothing should have cleaned it. + */ + BUG_ON(!PageDirty(page)); + free_delalloc_space = false; out_reserved: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); - if (ret) + if (free_delalloc_space) btrfs_delalloc_release_space(inode, data_reserved, page_start, PAGE_SIZE, true); -out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); out_page: + if (ret) { + /* + * We hit ENOSPC or other errors. Update the mapping and page + * to reflect the errors and clean the page. + */ + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + clear_page_dirty_for_io(page); + SetPageError(page); + } + ClearPageChecked(page); unlock_page(page); put_page(page); kfree(fixup); extent_changeset_free(data_reserved); + /* + * As a precaution, do a delayed iput in case it would be the last iput + * that could need flushing space. Recursing back to fixup worker would + * deadlock. + */ + btrfs_add_delayed_iput(inode); } /* @@ -2293,6 +2348,13 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) if (TestClearPagePrivate2(page)) return 0; + /* + * PageChecked is set below when we create a fixup worker for this page, + * don't try to create another one if we're already PageChecked() + * + * The extent_io writepage code will redirty the page if we send back + * EAGAIN. + */ if (PageChecked(page)) return -EAGAIN; @@ -2300,12 +2362,21 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end) if (!fixup) return -EAGAIN; + /* + * We are already holding a reference to this inode from + * write_cache_pages. We need to hold it because the space reservation + * takes place outside of the page lock, and we can't trust + * page->mapping outside of the page lock. + */ + ihold(inode); SetPageChecked(page); get_page(page); btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; + fixup->inode = inode; btrfs_queue_work(fs_info->fixup_workers, &fixup->work); - return -EBUSY; + + return -EAGAIN; } static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, @@ -4686,6 +4757,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u64 bytes_deleted = 0; bool be_nice = false; bool should_throttle = false; + const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize); + struct extent_state *cached_state = NULL; BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); @@ -4702,6 +4775,10 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, return -ENOMEM; path->reada = READA_BACK; + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) + lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1, + &cached_state); + /* * We want to drop from the next block forward in case this new size is * not block aligned since we will be keeping the last block of the @@ -4738,7 +4815,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, goto out; } - path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); if (ret < 0) goto out; @@ -4890,7 +4966,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, root == fs_info->tree_root)) { struct btrfs_ref ref = { 0 }; - btrfs_set_path_blocking(path); bytes_deleted += extent_num_bytes; btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, @@ -4966,6 +5041,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (!ret && last_size > new_size) last_size = new_size; btrfs_ordered_update_i_size(inode, last_size, NULL); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, + (u64)-1, &cached_state); } btrfs_free_path(path); @@ -8367,6 +8444,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, { struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio); + u16 csum_size; blk_status_t ret; /* @@ -8386,7 +8464,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, file_offset -= dip->logical_offset; file_offset >>= inode->i_sb->s_blocksize_bits; - io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset); + csum_size = btrfs_super_csum_size(btrfs_sb(inode->i_sb)->super_copy); + io_bio->csum = orig_io_bio->csum + csum_size * file_offset; return 0; } @@ -10080,6 +10159,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); if (ret) commit_transaction = true; + } else if (sync_log) { + mutex_lock(&root->log_mutex); + list_del(&ctx.list); + mutex_unlock(&root->log_mutex); } if (commit_transaction) { ret = btrfs_commit_transaction(trans); @@ -10410,6 +10493,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -10444,6 +10528,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -10523,9 +10616,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 12ae31e1813e..173758d86feb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3243,6 +3243,7 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, struct inode *dst, u64 dst_loff) { + const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; int ret; /* @@ -3250,7 +3251,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, * source range to serialize with relocation. */ btrfs_double_extent_lock(src, loff, dst, dst_loff, len); - ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1); + ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); return ret; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index fb09bc2f8e4d..0596117202a2 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -686,10 +686,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 39fc8c3d3a75..410b791f28a5 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -4016,3 +4016,16 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, } return ret; } + +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans) +{ + struct btrfs_qgroup_extent_record *entry; + struct btrfs_qgroup_extent_record *next; + struct rb_root *root; + + root = &trans->delayed_refs.dirty_extent_root; + rbtree_postorder_for_each_entry_safe(entry, next, root, node) { + ulist_free(entry->old_roots); + kfree(entry); + } +} diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 236f12224d52..1bc654459469 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -414,5 +414,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, u64 last_snapshot); int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb); +void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); #endif diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index b57f3618e58e..454a1015d026 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -744,6 +744,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, */ be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); if (IS_ERR(be)) { + kfree(ref); kfree(ra); ret = PTR_ERR(be); goto out; @@ -757,6 +758,8 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "re-allocated a block that still has references to it!"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); + kfree(ra); goto out_unlock; } @@ -819,6 +822,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "dropping a ref for a existing root that doesn't have a ref on the block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } @@ -834,6 +838,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info, "attempting to add another ref for an existing ref on a tree block"); dump_block_entry(fs_info, be); dump_ref_action(fs_info, ra); + kfree(ref); kfree(ra); goto out_unlock; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 091e5bc8c7ea..0b42dac8a35f 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1269,7 +1269,8 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) * destination of the stream. */ if (ino == bctx->cur_objectid && - offset >= bctx->sctx->cur_inode_next_write_offset) + offset + bctx->extent_len > + bctx->sctx->cur_inode_next_write_offset) return 0; } @@ -5804,6 +5805,18 @@ static int process_extent(struct send_ctx *sctx, } } + /* + * There might be a hole between the end of the last processed extent + * and this extent, and we may have not sent a write operation for that + * hole because it was not needed (range is beyond eof in the parent + * snapshot). So adjust the next write offset to the offset of this + * extent, as we want to make sure we don't do mistakes when checking if + * we can clone this extent from some other offset in this inode or when + * detecting if we need to issue a truncate operation when finishing the + * processing this inode. + */ + sctx->cur_inode_next_write_offset = key->offset; + ret = find_extent_clone(sctx, path, key->objectid, key->offset, sctx->cur_inode_size, &found_clone); if (ret != -ENOENT && ret < 0) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f452a94abdc3..c6557d44907a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1803,6 +1803,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (btrfs_super_log_root(fs_info->super_copy) != 0) { + btrfs_warn(fs_info, + "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } @@ -2104,7 +2106,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ thresh = SZ_4M; - if (!mixed && total_free_meta - thresh < block_rsv->size) + /* + * We only want to claim there's no available space if we can no longer + * allocate chunks for our metadata profile and our global reserve will + * not fit in the free metadata space. If we aren't ->full then we + * still can allocate chunks and thus are fine using the currently + * calculated f_bavail. + */ + if (!mixed && block_rsv->space_info->full && + total_free_meta - thresh < block_rsv->size) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a7aca4141788..fa3d49d8e503 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -121,7 +121,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) spin_lock_init(&fs_info->qgroup_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); mutex_init(&fs_info->qgroup_ioctl_lock); mutex_init(&fs_info->qgroup_rescan_lock); rwlock_init(&fs_info->tree_mod_log_lock); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 123d9a614357..df7ce874a74b 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -441,8 +441,17 @@ static int test_find_first_clear_extent_bit(void) int ret = -EINVAL; test_msg("running find_first_clear_extent_bit test"); + extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL); + /* Test correct handling of empty tree */ + find_first_clear_extent_bit(&tree, 0, &start, &end, CHUNK_TRIMMED); + if (start != 0 || end != -1) { + test_err( + "error getting a range from completely empty tree: start %llu end %llu", + start, end); + goto out; + } /* * Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between * 4M-32M diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cfc08ef9b876..beb6c69cd1e5 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -121,6 +121,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) BUG_ON(!list_empty(&transaction->list)); WARN_ON(!RB_EMPTY_ROOT( &transaction->delayed_refs.href_root.rb_root)); + WARN_ON(!RB_EMPTY_ROOT( + &transaction->delayed_refs.dirty_extent_root)); if (transaction->delayed_refs.pending_csums) btrfs_err(transaction->fs_info, "pending csums is %llu", @@ -147,13 +149,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) } } -static noinline void switch_commit_roots(struct btrfs_transaction *trans) +static noinline void switch_commit_roots(struct btrfs_trans_handle *trans) { + struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *root, *tmp; down_write(&fs_info->commit_root_sem); - list_for_each_entry_safe(root, tmp, &trans->switch_commits, + list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, dirty_list) { list_del_init(&root->dirty_list); free_extent_buffer(root->commit_root); @@ -165,16 +168,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans) } /* We can free old roots now. */ - spin_lock(&trans->dropped_roots_lock); - while (!list_empty(&trans->dropped_roots)) { - root = list_first_entry(&trans->dropped_roots, + spin_lock(&cur_trans->dropped_roots_lock); + while (!list_empty(&cur_trans->dropped_roots)) { + root = list_first_entry(&cur_trans->dropped_roots, struct btrfs_root, root_list); list_del_init(&root->root_list); - spin_unlock(&trans->dropped_roots_lock); + spin_unlock(&cur_trans->dropped_roots_lock); + btrfs_free_log(trans, root); btrfs_drop_and_free_fs_root(fs_info, root); - spin_lock(&trans->dropped_roots_lock); + spin_lock(&cur_trans->dropped_roots_lock); } - spin_unlock(&trans->dropped_roots_lock); + spin_unlock(&cur_trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -1421,7 +1425,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans, ret = commit_cowonly_roots(trans); if (ret) goto out; - switch_commit_roots(trans->transaction); + switch_commit_roots(trans); ret = btrfs_write_and_wait_transaction(trans); if (ret) btrfs_handle_fs_error(fs_info, ret, @@ -2013,6 +2017,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ASSERT(refcount_read(&trans->use_count) == 1); + /* + * Some places just start a transaction to commit it. We need to make + * sure that if this commit fails that the abort code actually marks the + * transaction as failed, so set trans->dirty to make the abort code do + * the right thing. + */ + trans->dirty = true; + /* Stop the commit early if ->aborted is set */ if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; @@ -2301,7 +2313,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) list_add_tail(&fs_info->chunk_root->dirty_list, &cur_trans->switch_commits); - switch_commit_roots(cur_trans); + switch_commit_roots(trans); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d3f115909ff0..f5bb59470957 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3935,7 +3935,7 @@ static int log_csums(struct btrfs_trans_handle *trans, static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, - struct btrfs_path *src_path, u64 *last_extent, + struct btrfs_path *src_path, int start_slot, int nr, int inode_only, u64 logged_isize) { @@ -3946,7 +3946,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; - struct btrfs_key first_key, last_key, key; int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -3954,9 +3953,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int i; struct list_head ordered_sums; int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; - bool has_extents = false; - bool need_find_last_extent = true; - bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3965,8 +3961,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!ins_data) return -ENOMEM; - first_key.objectid = (u64)-1; - ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -3987,9 +3981,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if (i == nr - 1) - last_key = ins_keys[i]; - if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_path->slots[0], @@ -4003,20 +3994,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset, ins_sizes[i]); } - /* - * We set need_find_last_extent here in case we know we were - * processing other items and then walk into the first extent in - * the inode. If we don't hit an extent then nothing changes, - * we'll do the last search the next time around. - */ - if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { - has_extents = true; - if (first_key.objectid == (u64)-1) - first_key = ins_keys[i]; - } else { - need_find_last_extent = false; - } - /* take a reference on file data extents so that truncates * or deletes of this inode don't have to relog the inode * again @@ -4082,167 +4059,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, kfree(sums); } - if (!has_extents) - return ret; - - if (need_find_last_extent && *last_extent == first_key.offset) { - /* - * We don't have any leafs between our current one and the one - * we processed before that can have file extent items for our - * inode (and have a generation number smaller than our current - * transaction id). - */ - need_find_last_extent = false; - } - - /* - * Because we use btrfs_search_forward we could skip leaves that were - * not modified and then assume *last_extent is valid when it really - * isn't. So back up to the previous leaf and read the end of the last - * extent before we go and fill in holes. - */ - if (need_find_last_extent) { - u64 len; - - ret = btrfs_prev_leaf(inode->root, src_path); - if (ret < 0) - return ret; - if (ret) - goto fill_holes; - if (src_path->slots[0]) - src_path->slots[0]--; - src = src_path->nodes[0]; - btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - goto fill_holes; - extent = btrfs_item_ptr(src, src_path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - *last_extent = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - *last_extent = key.offset + len; - } - } -fill_holes: - /* So we did prev_leaf, now we need to move to the next leaf, but a few - * things could have happened - * - * 1) A merge could have happened, so we could currently be on a leaf - * that holds what we were copying in the first place. - * 2) A split could have happened, and now not all of the items we want - * are on the same leaf. - * - * So we need to adjust how we search for holes, we need to drop the - * path and re-search for the first extent key we found, and then walk - * forward until we hit the last one we copied. - */ - if (need_find_last_extent) { - /* btrfs_prev_leaf could return 1 without releasing the path */ - btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, inode->root, &first_key, - src_path, 0, 0); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = src_path->slots[0]; - } else { - i = start_slot; - } - - /* - * Ok so here we need to go through and fill in any holes we may have - * to make sure that holes are punched for those areas in case they had - * extents previously. - */ - while (!done) { - u64 offset, len; - u64 extent_end; - - if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = 0; - need_find_last_extent = true; - } - - btrfs_item_key_to_cpu(src, &key, i); - if (!btrfs_comp_cpu_keys(&key, &last_key)) - done = true; - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) { - i++; - continue; - } - extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(src, extent); - extent_end = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - extent_end = key.offset + len; - } - i++; - - if (*last_extent == key.offset) { - *last_extent = extent_end; - continue; - } - offset = *last_extent; - len = key.offset - *last_extent; - ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, 0, 0); - if (ret) - break; - *last_extent = extent_end; - } - - /* - * Check if there is a hole between the last extent found in our leaf - * and the first extent in the next leaf. If there is one, we need to - * log an explicit hole so that at replay time we can punch the hole. - */ - if (ret == 0 && - key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - i == btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - need_find_last_extent = true; - if (ret > 0) { - ret = 0; - } else if (ret == 0) { - btrfs_item_key_to_cpu(src_path->nodes[0], &key, - src_path->slots[0]); - if (key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - *last_extent < key.offset) { - const u64 len = key.offset - *last_extent; - - ret = btrfs_insert_file_extent(trans, log, - btrfs_ino(inode), - *last_extent, 0, - 0, len, 0, len, - 0, 0, 0); - *last_extent += len; - } - } - } - /* - * Need to let the callers know we dropped the path so they should - * re-search. - */ - if (!ret && need_find_last_extent) - ret = 1; return ret; } @@ -4407,7 +4223,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 i_size = i_size_read(&inode->vfs_inode); const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; - u64 last_extent = (u64)-1; + bool dropped_extents = false; int ins_nr = 0; int start_slot; int ret; @@ -4429,8 +4245,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); + start_slot, ins_nr, 1, 0); if (ret < 0) goto out; ins_nr = 0; @@ -4454,8 +4269,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (last_extent == (u64)-1) { - last_extent = key.offset; + if (!dropped_extents) { /* * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. @@ -4469,6 +4283,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } while (ret == -EAGAIN); if (ret) goto out; + dropped_extents = true; } if (ins_nr == 0) start_slot = slot; @@ -4483,7 +4298,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } } if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, start_slot, ins_nr, 1, 0); if (ret > 0) ret = 0; @@ -4670,13 +4485,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; ins_nr = 0; @@ -4700,13 +4510,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, cond_resched(); } if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; } @@ -4715,100 +4520,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } /* - * If the no holes feature is enabled we need to make sure any hole between the - * last extent and the i_size of our inode is explicitly marked in the log. This - * is to make sure that doing something like: - * - * 1) create file with 128Kb of data - * 2) truncate file to 64Kb - * 3) truncate file to 256Kb - * 4) fsync file - * 5) - * 6) mount fs and trigger log replay - * - * Will give us a file with a size of 256Kb, the first 64Kb of data match what - * the file had in its first 64Kb of data at step 1 and the last 192Kb of the - * file correspond to a hole. The presence of explicit holes in a log tree is - * what guarantees that log replay will remove/adjust file extent items in the - * fs/subvol tree. - * - * Here we do not need to care about holes between extents, that is already done - * by copy_items(). We also only need to do this in the full sync path, where we - * lookup for extents from the fs/subvol tree only. In the fast path case, we - * lookup the list of modified extent maps and if any represents a hole, we - * insert a corresponding extent representing a hole in the log tree. + * When using the NO_HOLES feature if we punched a hole that causes the + * deletion of entire leafs or all the extent items of the first leaf (the one + * that contains the inode item and references) we may end up not processing + * any extents, because there are no leafs with a generation matching the + * current transaction that have extent items for our inode. So we need to find + * if any holes exist and then log them. We also need to log holes after any + * truncate operation that changes the inode's size. */ -static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_inode *inode, - struct btrfs_path *path) +static int btrfs_log_holes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; - int ret; struct btrfs_key key; - u64 hole_start; - u64 hole_size; - struct extent_buffer *leaf; - struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); + u64 prev_extent_end = 0; + int ret; - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) return 0; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = (u64)-1; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - ASSERT(ret != 0); if (ret < 0) return ret; - ASSERT(path->slots[0] > 0); - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { - /* inode does not have any extents */ - hole_start = 0; - hole_size = i_size; - } else { + while (true) { struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf = path->nodes[0]; u64 len; - /* - * If there's an extent beyond i_size, an explicit hole was - * already inserted by copy_items(). - */ - if (key.offset >= i_size) - return 0; + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) + break; + + /* We have a hole, log it. */ + if (prev_extent_end < key.offset) { + const u64 hole_len = key.offset - prev_extent_end; + + /* + * Release the path to avoid deadlocks with other code + * paths that search the root while holding locks on + * leafs from the log root. + */ + btrfs_release_path(path); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, + 0, hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + + /* + * Search for the same key again in the root. Since it's + * an extent item and we are holding the inode lock, the + * key must still exist. If it doesn't just emit warning + * and return an error to fall back to a transaction + * commit. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (WARN_ON(ret > 0)) + return -ENOENT; + leaf = path->nodes[0]; + } extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) - return 0; + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_ram_bytes(leaf, extent); + prev_extent_end = ALIGN(key.offset + len, + fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + prev_extent_end = key.offset + len; + } - len = btrfs_file_extent_num_bytes(leaf, extent); - /* Last extent goes beyond i_size, no need to log a hole. */ - if (key.offset + len > i_size) - return 0; - hole_start = key.offset + len; - hole_size = i_size - hole_start; + path->slots[0]++; + cond_resched(); } - btrfs_release_path(path); - /* Last extent ends at i_size. */ - if (hole_size == 0) - return 0; + if (prev_extent_end < i_size) { + u64 hole_len; - hole_size = ALIGN(hole_size, fs_info->sectorsize); - ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, - hole_size, 0, hole_size, 0, 0, 0); - return ret; + btrfs_release_path(path); + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, 0, + hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + } + + return 0; } /* @@ -5011,6 +4835,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans, } continue; } + /* + * If the inode was already logged skip it - otherwise we can + * hit an infinite loop. Example: + * + * From the commit root (previous transaction) we have the + * following inodes: + * + * inode 257 a directory + * inode 258 with references "zz" and "zz_link" on inode 257 + * inode 259 with reference "a" on inode 257 + * + * And in the current (uncommitted) transaction we have: + * + * inode 257 a directory, unchanged + * inode 258 with references "a" and "a2" on inode 257 + * inode 259 with reference "zz_link" on inode 257 + * inode 261 with reference "zz" on inode 257 + * + * When logging inode 261 the following infinite loop could + * happen if we don't skip already logged inodes: + * + * - we detect inode 258 as a conflicting inode, with inode 261 + * on reference "zz", and log it; + * + * - we detect inode 259 as a conflicting inode, with inode 258 + * on reference "a", and log it; + * + * - we detect inode 258 as a conflicting inode, with inode 259 + * on reference "zz_link", and log it - again! After this we + * repeat the above steps forever. + */ + spin_lock(&BTRFS_I(inode)->lock); + /* + * Check the inode's logged_trans only instead of + * btrfs_inode_in_log(). This is because the last_log_commit of + * the inode is not updated when we only log that it exists and + * and it has the full sync bit set (see btrfs_log_inode()). + */ + if (BTRFS_I(inode)->logged_trans == trans->transid) { + spin_unlock(&BTRFS_I(inode)->lock); + btrfs_add_delayed_iput(inode); + continue; + } + spin_unlock(&BTRFS_I(inode)->lock); /* * We are safe logging the other inode without acquiring its * lock as long as we log with the LOG_INODE_EXISTS mode. We @@ -5110,7 +4978,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_key min_key; struct btrfs_key max_key; struct btrfs_root *log = root->log_root; - u64 last_extent = 0; int err = 0; int ret; int nritems; @@ -5288,7 +5155,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ins_start_slot = path->slots[0]; } ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { @@ -5311,17 +5178,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } ins_nr = 0; - if (ret) { - btrfs_release_path(path); - continue; - } goto next_slot; } @@ -5334,18 +5197,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, goto next_slot; } - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - if (ret) { - ins_nr = 0; - btrfs_release_path(path); - continue; - } ins_nr = 1; ins_start_slot = path->slots[0]; next_slot: @@ -5359,13 +5217,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } btrfs_release_path(path); @@ -5380,14 +5237,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } } if (ins_nr) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } @@ -5400,7 +5256,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9b78e720c697..a8b71ded4d21 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -697,17 +697,54 @@ static struct btrfs_fs_devices *find_fsid_changed( /* * Handles the case where scanned device is part of an fs that had * multiple successful changes of FSID but curently device didn't - * observe it. Meaning our fsid will be different than theirs. + * observe it. Meaning our fsid will be different than theirs. We need + * to handle two subcases : + * 1 - The fs still continues to have different METADATA/FSID uuids. + * 2 - The fs is switched back to its original FSID (METADATA/FSID + * are equal). */ list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + /* Changed UUIDs */ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE) == 0 && memcmp(fs_devices->fsid, disk_super->fsid, - BTRFS_FSID_SIZE) != 0) { + BTRFS_FSID_SIZE) != 0) + return fs_devices; + + /* Unchanged UUIDs */ + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE) == 0 && + memcmp(fs_devices->fsid, disk_super->metadata_uuid, + BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + + return NULL; +} + +static struct btrfs_fs_devices *find_fsid_reverted_metadata( + struct btrfs_super_block *disk_super) +{ + struct btrfs_fs_devices *fs_devices; + + /* + * Handle the case where the scanned device is part of an fs whose last + * metadata UUID change reverted it to the original FSID. At the same + * time * fs_devices was first created by another constitutent device + * which didn't fully observe the operation. This results in an + * btrfs_fs_devices created with metadata/fsid different AND + * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the + * fs_devices equal to the FSID of the disk. + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) != 0 && + memcmp(fs_devices->metadata_uuid, disk_super->fsid, + BTRFS_FSID_SIZE) == 0 && + fs_devices->fsid_change) return fs_devices; - } } return NULL; @@ -751,7 +788,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid, disk_super->metadata_uuid); } else { - fs_devices = find_fsid(disk_super->fsid, NULL); + fs_devices = find_fsid_reverted_metadata(disk_super); + if (!fs_devices) + fs_devices = find_fsid(disk_super->fsid, NULL); } @@ -781,12 +820,18 @@ static noinline struct btrfs_device *device_list_add(const char *path, * a device which had the CHANGING_FSID_V2 flag then replace the * metadata_uuid/fsid values of the fs_devices. */ - if (has_metadata_uuid && fs_devices->fsid_change && + if (fs_devices->fsid_change && found_transid > fs_devices->latest_generation) { memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); - memcpy(fs_devices->metadata_uuid, - disk_super->metadata_uuid, BTRFS_FSID_SIZE); + + if (has_metadata_uuid) + memcpy(fs_devices->metadata_uuid, + disk_super->metadata_uuid, + BTRFS_FSID_SIZE); + else + memcpy(fs_devices->metadata_uuid, + disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->fsid_change = false; } @@ -7331,6 +7376,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_set(dev, i, 0); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 11929d2bb594..cd09e63d682b 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1418,6 +1418,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_cap_flush *prealloc_cf; ssize_t count, written = 0; int err, want, got; + bool direct_lock = false; loff_t pos; loff_t limit = max(i_size_read(inode), fsc->max_file_size); @@ -1428,8 +1429,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!prealloc_cf) return -ENOMEM; + if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT) + direct_lock = true; + retry_snap: - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_start_io_direct(inode); else ceph_start_io_write(inode); @@ -1519,14 +1523,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* we might need to revert back to that point */ data = *from; - if (iocb->ki_flags & IOCB_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) written = ceph_direct_read_write(iocb, &data, snapc, &prealloc_cf); - ceph_end_io_direct(inode); - } else { + else written = ceph_sync_write(iocb, &data, pos, snapc); + if (direct_lock) + ceph_end_io_direct(inode); + else ceph_end_io_write(inode); - } if (written > 0) iov_iter_advance(from, written); ceph_put_snap_context(snapc); @@ -1577,7 +1582,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out_unlocked; out: - if (iocb->ki_flags & IOCB_DIRECT) + if (direct_lock) ceph_end_io_direct(inode); else ceph_end_io_write(inode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 145d46ba25ae..816d49aed96b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2558,8 +2558,7 @@ static void __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 29a795f975df..5a708ac9a54c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -1020,10 +1020,6 @@ static int ceph_get_tree(struct fs_context *fc) if (!fc->source) return invalf(fc, "ceph: No source"); -#ifdef CONFIG_CEPH_FS_POSIX_ACL - fc->sb_flags |= SB_POSIXACL; -#endif - /* create client (which we may/may not use) */ fsc = create_fs_client(pctx->opts, pctx->copts); pctx->opts = NULL; @@ -1070,6 +1066,11 @@ static int ceph_get_tree(struct fs_context *fc) return 0; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; @@ -1141,6 +1142,10 @@ static int ceph_init_fs_context(struct fs_context *fc) fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; fsopt->congestion_kb = default_congestion_kb(); +#ifdef CONFIG_CEPH_FS_POSIX_ACL + fc->sb_flags |= SB_POSIXACL; +#endif + fc->fs_private = pctx; fc->ops = &ceph_context_ops; return 0; diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 41957b82d796..cc3ada12848d 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -120,17 +120,17 @@ cifs_build_devname(char *nodename, const char *prepath) /** - * cifs_compose_mount_options - creates mount options for refferral + * cifs_compose_mount_options - creates mount options for referral * @sb_mountdata: parent/root DFS mount options (template) * @fullpath: full path in UNC format - * @ref: server's referral + * @ref: optional server's referral * @devname: optional pointer for saving device name * * creates mount options for submount based on template options sb_mountdata * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. * * Returns: pointer to new mount options or ERR_PTR. - * Caller is responcible for freeing retunrned value if it is not error. + * Caller is responsible for freeing returned value if it is not error. */ char *cifs_compose_mount_options(const char *sb_mountdata, const char *fullpath, @@ -150,18 +150,27 @@ char *cifs_compose_mount_options(const char *sb_mountdata, if (sb_mountdata == NULL) return ERR_PTR(-EINVAL); - if (strlen(fullpath) - ref->path_consumed) { - prepath = fullpath + ref->path_consumed; - /* skip initial delimiter */ - if (*prepath == '/' || *prepath == '\\') - prepath++; - } + if (ref) { + if (strlen(fullpath) - ref->path_consumed) { + prepath = fullpath + ref->path_consumed; + /* skip initial delimiter */ + if (*prepath == '/' || *prepath == '\\') + prepath++; + } - name = cifs_build_devname(ref->node_name, prepath); - if (IS_ERR(name)) { - rc = PTR_ERR(name); - name = NULL; - goto compose_mount_options_err; + name = cifs_build_devname(ref->node_name, prepath); + if (IS_ERR(name)) { + rc = PTR_ERR(name); + name = NULL; + goto compose_mount_options_err; + } + } else { + name = cifs_build_devname((char *)fullpath, NULL); + if (IS_ERR(name)) { + rc = PTR_ERR(name); + name = NULL; + goto compose_mount_options_err; + } } rc = dns_resolve_server_name_to_ip(name, &srvIP); @@ -225,6 +234,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, if (devname) *devname = name; + else + kfree(name); /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ @@ -241,23 +252,23 @@ char *cifs_compose_mount_options(const char *sb_mountdata, } /** - * cifs_dfs_do_refmount - mounts specified path using provided refferal + * cifs_dfs_do_mount - mounts specified path using DFS full path + * + * Always pass down @fullpath to smb3_do_mount() so we can use the root server + * to perform failover in case we failed to connect to the first target in the + * referral. + * * @cifs_sb: parent/root superblock * @fullpath: full path in UNC format - * @ref: server's referral */ -static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, - struct cifs_sb_info *cifs_sb, - const char *fullpath, const struct dfs_info3_param *ref) +static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, + struct cifs_sb_info *cifs_sb, + const char *fullpath) { struct vfsmount *mnt; char *mountdata; char *devname; - /* - * Always pass down the DFS full path to smb3_do_mount() so we - * can use it later for failover. - */ devname = kstrndup(fullpath, strlen(fullpath), GFP_KERNEL); if (!devname) return ERR_PTR(-ENOMEM); @@ -266,7 +277,7 @@ static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, /* strip first '\' from fullpath */ mountdata = cifs_compose_mount_options(cifs_sb->mountdata, - fullpath + 1, ref, NULL); + fullpath + 1, NULL, NULL); if (IS_ERR(mountdata)) { kfree(devname); return (struct vfsmount *)mountdata; @@ -278,28 +289,16 @@ static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, return mnt; } -static void dump_referral(const struct dfs_info3_param *ref) -{ - cifs_dbg(FYI, "DFS: ref path: %s\n", ref->path_name); - cifs_dbg(FYI, "DFS: node path: %s\n", ref->node_name); - cifs_dbg(FYI, "DFS: fl: %d, srv_type: %d\n", - ref->flags, ref->server_type); - cifs_dbg(FYI, "DFS: ref_flags: %d, path_consumed: %d\n", - ref->ref_flag, ref->path_consumed); -} - /* * Create a vfsmount that we can automount */ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) { - struct dfs_info3_param referral = {0}; struct cifs_sb_info *cifs_sb; struct cifs_ses *ses; struct cifs_tcon *tcon; char *full_path, *root_path; unsigned int xid; - int len; int rc; struct vfsmount *mnt; @@ -325,6 +324,8 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (full_path == NULL) goto cdda_exit; + convert_delimiter(full_path, '\\'); + cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); if (!cifs_sb_master_tlink(cifs_sb)) { @@ -357,7 +358,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (!rc) { rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), full_path + 1, - &referral, NULL); + NULL, NULL); } free_xid(xid); @@ -366,26 +367,16 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) mnt = ERR_PTR(rc); goto free_root_path; } - - dump_referral(&referral); - - len = strlen(referral.node_name); - if (len < 2) { - cifs_dbg(VFS, "%s: Net Address path too short: %s\n", - __func__, referral.node_name); - mnt = ERR_PTR(-EINVAL); - goto free_dfs_ref; - } /* - * cifs_mount() will retry every available node server in case - * of failures. + * OK - we were able to get and cache a referral for @full_path. + * + * Now, pass it down to cifs_mount() and it will retry every available + * node server in case of failures - no need to do it here. */ - mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, full_path, &referral); - cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , mnt:%p\n", __func__, - referral.node_name, mnt); + mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path); + cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, + full_path + 1, mnt); -free_dfs_ref: - free_dfs_info_param(&referral); free_root_path: kfree(root_path); free_full_path: diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 96ae72b556ac..25704beb9d4c 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -601,7 +601,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; } @@ -630,7 +630,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n", mode, *pace_flags); return; } @@ -802,6 +802,26 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, return; } +unsigned int setup_authusers_ACE(struct cifs_ace *pntace) +{ + int i; + unsigned int ace_size = 20; + + pntace->type = ACCESS_ALLOWED_ACE_TYPE; + pntace->flags = 0x0; + pntace->access_req = cpu_to_le32(GENERIC_ALL); + pntace->sid.num_subauth = 1; + pntace->sid.revision = 1; + for (i = 0; i < NUM_AUTHS; i++) + pntace->sid.authority[i] = sid_authusers.authority[i]; + + pntace->sid.sub_auth[0] = sid_authusers.sub_auth[0]; + + /* size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth*4) */ + pntace->size = cpu_to_le16(ace_size); + return ace_size; +} + /* * Fill in the special SID based on the mode. See * http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5492b9860baa..7659286954d3 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -414,7 +414,7 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses) seq_puts(s, "ntlm"); break; case Kerberos: - seq_printf(s, "krb5,cruid=%u", from_kuid_munged(&init_user_ns,ses->cred_uid)); + seq_puts(s, "krb5"); break; case RawNTLMSSP: seq_puts(s, "ntlmssp"); @@ -427,6 +427,10 @@ cifs_show_security(struct seq_file *s, struct cifs_ses *ses) if (ses->sign) seq_puts(s, "i"); + + if (ses->sectype == Kerberos) + seq_printf(s, ",cruid=%u", + from_kuid_munged(&init_user_ns, ses->cred_uid)); } static void @@ -526,6 +530,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) if (tcon->seal) seq_puts(s, ",seal"); + else if (tcon->ses->server->ignore_signature) + seq_puts(s, ",signloosely"); if (tcon->nocase) seq_puts(s, ",nocase"); if (tcon->local_lease) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 40705e862451..af789aac8ef7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1277,6 +1277,7 @@ struct cifs_fid { __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ __u8 create_guid[16]; + __u32 access; struct cifs_pending_open *pending_open; unsigned int epoch; #ifdef CONFIG_CIFS_DEBUG2 @@ -1588,6 +1589,7 @@ struct mid_q_entry { mid_callback_t *callback; /* call completion callback */ mid_handle_t *handle; /* call handle mid callback */ void *callback_data; /* general purpose pointer for callback */ + struct task_struct *creator; void *resp_buf; /* pointer to received SMB header */ unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ @@ -1736,6 +1738,12 @@ static inline bool is_retryable_error(int error) return false; } + +/* cifs_get_writable_file() flags */ +#define FIND_WR_ANY 0 +#define FIND_WR_FSUID_ONLY 1 +#define FIND_WR_WITH_DELETE 2 + #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 9c229408a251..3b583150bcd5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -134,11 +134,12 @@ extern bool backup_cred(struct cifs_sb_info *); extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); +extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, - bool fsuid_only, + int flags, struct cifsFileInfo **ret_file); extern int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file); extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); extern int cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, @@ -213,6 +214,7 @@ extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *, const struct cifs_fid *, u32 *); extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, const char *, int); +extern unsigned int setup_authusers_ACE(struct cifs_ace *pace); extern unsigned int setup_special_mode_ACE(struct cifs_ace *pace, __u64 nmode); extern void dequeue_mid(struct mid_q_entry *mid, bool malformed); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cc86a67225d1..69c38c379f33 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1492,6 +1492,7 @@ CIFS_open(const unsigned int xid, struct cifs_open_parms *oparms, int *oplock, *oplock = rsp->OplockLevel; /* cifs fid stays in le */ oparms->fid->netfid = rsp->Fid; + oparms->fid->access = desired_access; /* Let caller know file was created so we can set the mode. */ /* Do we care about the CreateAction in any other cases? */ @@ -2115,7 +2116,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata) wdata2->tailsz = tailsz; wdata2->bytes = cur_len; - rc = cifs_get_writable_file(CIFS_I(inode), false, + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &wdata2->cfile); if (!wdata2->cfile) { cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 05ea0e2b7e0e..641825cfa767 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3709,8 +3709,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + old->prepath; + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + new->prepath; if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; @@ -4149,7 +4151,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 2faa05860a48..cf6cec59696c 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1319,7 +1319,7 @@ static struct cifs_ses *find_root_ses(struct dfs_cache_vol_info *vi, char *mdata = NULL, *devname = NULL; struct TCP_Server_Info *server; struct cifs_ses *ses; - struct smb_vol vol; + struct smb_vol vol = {NULL}; rpath = get_dfs_root(path); if (IS_ERR(rpath)) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f3b79012ff29..4f83e48f4df3 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -558,7 +558,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 043288b5c728..dc195435519b 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1175,7 +1175,8 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); + rc = wait_event_interruptible(flock->fl_wait, + list_empty(&flock->fl_blocked_member)); if (!rc) goto try_again; locks_delete_block(flock); @@ -1964,7 +1965,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, /* Return -EBADF if no handle is found and general rc otherwise */ int -cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, struct cifsFileInfo **ret_file) { struct cifsFileInfo *open_file, *inv_file = NULL; @@ -1972,7 +1973,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, bool any_available = false; int rc = -EBADF; unsigned int refind = 0; - + bool fsuid_only = flags & FIND_WR_FSUID_ONLY; + bool with_delete = flags & FIND_WR_WITH_DELETE; *ret_file = NULL; /* @@ -2004,6 +2006,8 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, continue; if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) continue; + if (with_delete && !(open_file->fid.access & DELETE)) + continue; if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { if (!open_file->invalidHandle) { /* found a good writable file */ @@ -2051,12 +2055,12 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only, } struct cifsFileInfo * -find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) { struct cifsFileInfo *cfile; int rc; - rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile); + rc = cifs_get_writable_file(cifs_inode, flags, &cfile); if (rc) cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc); @@ -2065,6 +2069,7 @@ find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only) int cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, struct cifsFileInfo **ret_file) { struct list_head *tmp; @@ -2091,7 +2096,7 @@ cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, kfree(full_path); cinode = CIFS_I(d_inode(cfile->dentry)); spin_unlock(&tcon->open_file_lock); - return cifs_get_writable_file(cinode, 0, ret_file); + return cifs_get_writable_file(cinode, flags, ret_file); } spin_unlock(&tcon->open_file_lock); @@ -2168,7 +2173,8 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) if (mapping->host->i_size - offset < (loff_t)to) to = (unsigned)(mapping->host->i_size - offset); - rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file); + rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, + &open_file); if (!rc) { bytes_written = cifs_write(open_file, open_file->pid, write_data, to - from, &offset); @@ -2361,7 +2367,7 @@ static int cifs_writepages(struct address_space *mapping, if (cfile) cifsFileInfo_put(cfile); - rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile); + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); /* in case of an error store it to return later */ if (rc) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index ca76a9287456..e9a7536c2a5e 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1649,7 +1649,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n", mode, inode); cifs_sb = CIFS_SB(inode->i_sb); @@ -2074,6 +2074,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2095,15 +2096,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } @@ -2279,7 +2283,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, * writebehind data than the SMB timeout for the SetPathInfo * request would allow */ - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; @@ -2429,7 +2433,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - open_file = find_writable_file(cifsInode, true); + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); if (open_file) { u16 nfid = open_file->fid.netfid; u32 npid = open_file->pid; @@ -2532,7 +2536,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = 0; if (attrs->ia_valid & ATTR_MTIME) { - rc = cifs_get_writable_file(cifsInode, false, &wfile); + rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile); if (!rc) { tcon = tlink_tcon(wfile->tlink); rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d17587c2c4ab..ba9dadf3be24 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -196,7 +196,8 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb) * may look wrong since the inodes may not have timed out by the time * "ls" does a stat() call on them. */ - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) || + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) fattr->cf_flags |= CIFS_FATTR_NEED_REVAL; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL && diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d70a2bb062df..e523c05a4487 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -765,7 +765,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, struct cifs_tcon *tcon; /* if the file is already open for write, just use that fileid */ - open_file = find_writable_file(cinode, true); + open_file = find_writable_file(cinode, FIND_WR_FSUID_ONLY); if (open_file) { fid.netfid = open_file->fid.netfid; netpid = open_file->pid; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 5ef5e97a6d13..bd3669532a09 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -526,7 +526,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, cifs_i = CIFS_I(inode); dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); - cifs_get_writable_path(tcon, name, &cfile); + cifs_get_writable_path(tcon, name, FIND_WR_ANY, &cfile); tmprc = smb2_compound_op(xid, tcon, cifs_sb, name, FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, ACL_NO_MODE, @@ -582,7 +582,7 @@ smb2_rename_path(const unsigned int xid, struct cifs_tcon *tcon, { struct cifsFileInfo *cfile; - cifs_get_writable_path(tcon, from_name, &cfile); + cifs_get_writable_path(tcon, from_name, FIND_WR_WITH_DELETE, &cfile); return smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, DELETE, SMB2_OP_RENAME, cfile); diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 0516fc482d43..0511aaf451d4 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -743,7 +743,7 @@ __smb2_handle_cancelled_cmd(struct cifs_tcon *tcon, __u16 cmd, __u64 mid, { struct close_cancelled_open *cancelled; - cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); + cancelled = kzalloc(sizeof(*cancelled), GFP_ATOMIC); if (!cancelled) return -ENOMEM; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6250370c1170..05a73af0e8c8 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1115,7 +1115,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, void *data[1]; struct smb2_file_full_ea_info *ea = NULL; struct kvec close_iov[1]; - int rc; + struct smb2_query_info_rsp *rsp; + int rc, used_len = 0; if (smb3_encryption_required(tcon)) flags |= CIFS_TRANSFORM_REQ; @@ -1138,6 +1139,38 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, cifs_sb); if (rc == -ENODATA) goto sea_exit; + } else { + /* If we are adding a attribute we should first check + * if there will be enough space available to store + * the new EA. If not we should not add it since we + * would not be able to even read the EAs back. + */ + rc = smb2_query_info_compound(xid, tcon, utf16_path, + FILE_READ_EA, + FILE_FULL_EA_INFORMATION, + SMB2_O_INFO_FILE, + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE, + &rsp_iov[1], &resp_buftype[1], cifs_sb); + if (rc == 0) { + rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base; + used_len = le32_to_cpu(rsp->OutputBufferLength); + } + free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base); + resp_buftype[1] = CIFS_NO_BUFFER; + memset(&rsp_iov[1], 0, sizeof(rsp_iov[1])); + rc = 0; + + /* Use a fudge factor of 256 bytes in case we collide + * with a different set_EAs command. + */ + if(CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE - 256 < + used_len + ea_name_len + ea_value_len + 1) { + rc = -ENOSPC; + goto sea_exit; + } } } @@ -1333,6 +1366,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock) cfile->fid.persistent_fid = fid->persistent_fid; cfile->fid.volatile_fid = fid->volatile_fid; + cfile->fid.access = fid->access; #ifdef CONFIG_CIFS_DEBUG2 cfile->fid.mid = fid->mid; #endif /* CIFS_DEBUG2 */ @@ -1523,7 +1557,9 @@ smb2_ioctl_query_info(const unsigned int xid, COMPOUND_FID, COMPOUND_FID, qi.info_type, true, buffer, qi.output_buffer_length, - CIFSMaxBufSize); + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE); } } else if (qi.flags == PASSTHRU_SET_INFO) { /* Can eventually relax perm check since server enforces too */ @@ -2697,7 +2733,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_ioctl_init(tcon, &rqst[1], fid.persistent_fid, fid.volatile_fid, FSCTL_GET_REPARSE_POINT, - true /* is_fctl */, NULL, 0, CIFSMaxBufSize); + true /* is_fctl */, NULL, 0, + CIFSMaxBufSize - + MAX_SMB2_CREATE_RESPONSE_SIZE - + MAX_SMB2_CLOSE_RESPONSE_SIZE); if (rc) goto querty_exit; @@ -3187,7 +3226,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs * some servers (Windows2016) will not reflect recent writes in * QUERY_ALLOCATED_RANGES until SMB2_flush is called. */ - wrcfile = find_writable_file(cifsi, false); + wrcfile = find_writable_file(cifsi, FIND_WR_ANY); if (wrcfile) { filemap_write_and_wait(inode->i_mapping); smb2_flush_file(xid, tcon, &wrcfile->fid); @@ -3276,7 +3315,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon, if (rc) goto out; - if (out_data_len < sizeof(struct file_allocated_range_buffer)) { + if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) { rc = -EINVAL; goto out; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 9434f6dd8df3..fc32fe546c1a 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -312,7 +312,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) if (server->tcpStatus != CifsNeedReconnect) break; - if (--retries) + if (retries && --retries) continue; /* @@ -350,9 +350,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -397,6 +402,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } @@ -2199,13 +2205,14 @@ create_sd_buf(umode_t mode, unsigned int *len) struct cifs_ace *pace; unsigned int sdlen, acelen; - *len = roundup(sizeof(struct crt_sd_ctxt) + sizeof(struct cifs_ace), 8); + *len = roundup(sizeof(struct crt_sd_ctxt) + sizeof(struct cifs_ace) * 2, + 8); buf = kzalloc(*len, GFP_KERNEL); if (buf == NULL) return buf; sdlen = sizeof(struct smb3_sd) + sizeof(struct smb3_acl) + - sizeof(struct cifs_ace); + 2 * sizeof(struct cifs_ace); buf->ccontext.DataOffset = cpu_to_le16(offsetof (struct crt_sd_ctxt, sd)); @@ -2232,8 +2239,12 @@ create_sd_buf(umode_t mode, unsigned int *len) /* create one ACE to hold the mode embedded in reserved special SID */ pace = (struct cifs_ace *)(sizeof(struct crt_sd_ctxt) + (char *)buf); acelen = setup_special_mode_ACE(pace, (__u64)mode); + /* and one more ACE to allow access for authenticated users */ + pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) + + (char *)buf)); + acelen += setup_authusers_ACE(pace); buf->acl.AclSize = cpu_to_le16(sizeof(struct cifs_acl) + acelen); - buf->acl.AceCount = cpu_to_le16(1); + buf->acl.AceCount = cpu_to_le16(2); return buf; } @@ -2738,6 +2749,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, atomic_inc(&tcon->num_remote_opens); oparms->fid->persistent_fid = rsp->PersistentFileId; oparms->fid->volatile_fid = rsp->VolatileFileId; + oparms->fid->access = oparms->desired_access; #ifdef CONFIG_CIFS_DEBUG2 oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId); #endif /* CIFS_DEBUG2 */ @@ -4018,6 +4030,9 @@ smb2_writev_callback(struct mid_q_entry *mid) wdata->cfile->fid.persistent_fid, tcon->tid, tcon->ses->Suid, wdata->offset, wdata->bytes, wdata->result); + if (wdata->result == -ENOSPC) + printk_once(KERN_WARNING "Out of space writing to %s\n", + tcon->treeName); } else trace_smb3_write_done(0 /* no xid */, wdata->cfile->fid.persistent_fid, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 387c88704c52..fe6acfce3390 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -685,6 +685,8 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, * The default is for the mid to be synchronous, so the * default callback just wakes up the current task. */ + get_task_struct(current); + temp->creator = current; temp->callback = cifs_wake_up_task; temp->callback_data = current; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3d2e11f85cba..cb3ee916f527 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -76,6 +76,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) * The default is for the mid to be synchronous, so the * default callback just wakes up the current task. */ + get_task_struct(current); + temp->creator = current; temp->callback = cifs_wake_up_task; temp->callback_data = current; @@ -158,6 +160,7 @@ static void _cifs_mid_q_entry_release(struct kref *refcount) } } #endif + put_task_struct(midEntry->creator); mempool_free(midEntry, cifs_mid_poolp); } diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 680aba9c00d5..fd0b5dd68f9e 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -76,14 +76,11 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr) if (ia_valid & ATTR_GID) sd_iattr->ia_gid = iattr->ia_gid; if (ia_valid & ATTR_ATIME) - sd_iattr->ia_atime = timestamp_truncate(iattr->ia_atime, - inode); + sd_iattr->ia_atime = iattr->ia_atime; if (ia_valid & ATTR_MTIME) - sd_iattr->ia_mtime = timestamp_truncate(iattr->ia_mtime, - inode); + sd_iattr->ia_mtime = iattr->ia_mtime; if (ia_valid & ATTR_CTIME) - sd_iattr->ia_ctime = timestamp_truncate(iattr->ia_ctime, - inode); + sd_iattr->ia_ctime = iattr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = iattr->ia_mode; diff --git a/fs/coredump.c b/fs/coredump.c index b1ea7dfbd149..f8296a82d01d 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -517,7 +517,7 @@ static void wait_for_dump_helpers(struct file *file) pipe_lock(pipe); pipe->readers++; pipe->writers--; - wake_up_interruptible_sync(&pipe->wait); + wake_up_interruptible_sync(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); pipe_unlock(pipe); @@ -525,7 +525,7 @@ static void wait_for_dump_helpers(struct file *file) * We actually want wait_event_freezable() but then we need * to clear TIF_SIGPENDING and improve dump_interrupted(). */ - wait_event_interruptible(pipe->wait, pipe->readers == 1); + wait_event_interruptible(pipe->rd_wait, pipe->readers == 1); pipe_lock(pipe); pipe->readers--; diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 40cca351273f..f09cb4cfae4d 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -666,9 +666,6 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; - struct dentry *dentry; - char _path[256]; - char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -687,22 +684,14 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; - dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); - if (dentry) { - path = dentry_path(dentry, _path, sizeof(_path)); - dput(dentry); - } - if (IS_ERR_OR_NULL(path)) - path = "(unknown)"; - fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino, path); + ino); return -EBUSY; } diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index f577bb6613f9..de81245c3e45 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -514,6 +514,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with diff --git a/fs/dax.c b/fs/dax.c index 1f1f0201cad1..0b0d8819cb1b 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1207,6 +1207,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index dede25247b81..18eeeb093a68 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -142,18 +142,21 @@ EXPORT_SYMBOL_GPL(debugfs_file_put); * We also need to exclude any file that has ways to write or alter it as root * can bypass the permissions check. */ -static bool debugfs_is_locked_down(struct inode *inode, - struct file *filp, - const struct file_operations *real_fops) +static int debugfs_locked_down(struct inode *inode, + struct file *filp, + const struct file_operations *real_fops) { if ((inode->i_mode & 07777) == 0444 && !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && !real_fops->mmap) - return false; + return 0; - return security_locked_down(LOCKDOWN_DEBUGFS); + if (security_locked_down(LOCKDOWN_DEBUGFS)) + return -EPERM; + + return 0; } static int open_proxy_open(struct inode *inode, struct file *filp) @@ -168,7 +171,7 @@ static int open_proxy_open(struct inode *inode, struct file *filp) real_fops = debugfs_real_fops(filp); - r = debugfs_is_locked_down(inode, filp, real_fops); + r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; @@ -298,7 +301,7 @@ static int full_proxy_open(struct inode *inode, struct file *filp) real_fops = debugfs_real_fops(filp); - r = debugfs_is_locked_down(inode, filp, real_fops); + r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index f91db24bbf3b..a064b408d841 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -311,8 +311,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 216fbe6a4837..4dc09638de8f 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1304,7 +1304,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES\n"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index d668e60b85b5..c05ca39aa449 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -379,6 +379,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; goto out; } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 2890a67a1ded..5779a15c2cd6 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -306,24 +306,22 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, } src = kmap_atomic(*rq->in); - if (!rq->out[0]) { - dst = NULL; - } else { + if (rq->out[0]) { dst = kmap_atomic(rq->out[0]); memcpy(dst + rq->pageofs_out, src, righthalf); + kunmap_atomic(dst); } - if (rq->out[1] == *rq->in) { - memmove(src, src + righthalf, rq->pageofs_out); - } else if (nrpages_out == 2) { - if (dst) - kunmap_atomic(dst); + if (nrpages_out == 2) { DBG_BUGON(!rq->out[1]); - dst = kmap_atomic(rq->out[1]); - memcpy(dst, src + righthalf, rq->pageofs_out); + if (rq->out[1] == *rq->in) { + memmove(src, src + righthalf, rq->pageofs_out); + } else { + dst = kmap_atomic(rq->out[1]); + memcpy(dst, src + righthalf, rq->pageofs_out); + kunmap_atomic(dst); + } } - if (dst) - kunmap_atomic(dst); kunmap_atomic(src); return 0; } diff --git a/fs/eventfd.c b/fs/eventfd.c index 8aa0ea8c55e8..78e41c7c3d05 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -24,6 +24,8 @@ #include #include +DEFINE_PER_CPU(int, eventfd_wake_count); + static DEFINE_IDA(eventfd_ida); struct eventfd_ctx { @@ -60,12 +62,25 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n) { unsigned long flags; + /* + * Deadlock or stack overflow issues can happen if we recurse here + * through waitqueue wakeup handlers. If the caller users potentially + * nested waitqueues with custom wakeup handlers, then it should + * check eventfd_signal_count() before calling this function. If + * it returns true, the eventfd_signal() call should be deferred to a + * safe context. + */ + if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count))) + return 0; + spin_lock_irqsave(&ctx->wqh.lock, flags); + this_cpu_inc(eventfd_wake_count); if (ULLONG_MAX - ctx->count < n) n = ULLONG_MAX - ctx->count; ctx->count += n; if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, EPOLLIN); + this_cpu_dec(eventfd_wake_count); spin_unlock_irqrestore(&ctx->wqh.lock, flags); return n; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 67a395039268..deebb47b6333 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1860,9 +1860,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, waiter = true; init_waitqueue_entry(&wait, current); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } for (;;) { @@ -1910,9 +1910,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, goto fetch_events; if (waiter) { - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __remove_wait_queue(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } return res; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index bcffe25da2f0..0e76db275e23 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1073,9 +1073,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext2; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) - 1) + / EXT2_BLOCKS_PER_GROUP(sb)) + 1; db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc_array (db_count, diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 0b202e00d93f..5aba67a504cf 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -270,6 +270,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -280,7 +281,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -288,10 +296,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 1ee04e76bbe0..0a734ffb4310 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -207,6 +207,7 @@ static int ext4_protect_reserved_inode(struct super_block *sb, return PTR_ERR(inode); num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; while (i < num) { + cond_resched(); map.m_lblk = i; map.m_len = num - i; n = ext4_map_blocks(NULL, inode, &map, 0); diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 9f00fc0bf21d..4dc2615ab289 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -129,12 +129,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { @@ -672,9 +674,11 @@ static int ext4_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { + if (!inode || !IS_CASEFOLDED(inode) || + !EXT4_SB(inode->i_sb)->s_encoding) { if (len != name->len) return -1; return memcmp(str, name->name, len); @@ -687,10 +691,11 @@ static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) { const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode) || !um) + if (!inode || !IS_CASEFOLDED(inode) || !um) return 0; norm = kmalloc(PATH_MAX, GFP_ATOMIC); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f8578caba40d..7a14e553d58f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1401,7 +1401,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1463,7 +1463,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -1513,7 +1513,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ @@ -1553,8 +1553,11 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; @@ -1574,6 +1577,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Inode dynamic state flags */ @@ -2482,8 +2502,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK @@ -2666,6 +2689,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, @@ -2913,13 +2937,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* @@ -2969,7 +2993,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6a7293a5cda2..977ac58dc718 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -88,9 +88,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock_shared(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) return -EAGAIN; + } else { inode_lock_shared(inode); } /* @@ -487,9 +488,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) bool extend = false; struct inode *inode = file_inode(iocb->ki_filp); - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8ca4a23129aa..7db0c8814f2e 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -325,11 +325,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -365,12 +367,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -1051,7 +1054,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1074,7 +1078,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 629a25d999f0..74a941e920cf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2466,7 +2466,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; @@ -2627,7 +2627,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); /* @@ -2848,7 +2848,7 @@ static int ext4_writepages(struct address_space *mapping, out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -2863,13 +2863,13 @@ static int ext4_dax_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc); trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -4615,6 +4615,18 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + ext4_error_inode(inode, function, line, 0, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -5818,7 +5830,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -5835,7 +5847,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); return err; } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); @@ -5843,7 +5855,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a3e2767bdf2f..c76ffc259d19 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2356,7 +2356,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2369,13 +2369,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2387,6 +2390,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2405,12 +2409,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2458,8 +2462,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2472,6 +2481,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2507,11 +2517,16 @@ static int ext4_mb_init_backend(struct super_block *sb) while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2700,7 +2715,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2719,9 +2734,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); @@ -3020,7 +3038,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4914,7 +4933,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } /* @@ -5071,7 +5091,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(clusters_freed, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 89725fa42573..fb6520f37135 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -407,6 +407,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -431,6 +432,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -441,7 +444,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -452,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -494,7 +497,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -576,10 +579,11 @@ int ext4_ext_migrate(struct inode *inode) ext4_ext_tree_init(handle, tmp_inode); out_stop: ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -589,7 +593,8 @@ int ext4_ext_migrate(struct inode *inode) int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -613,9 +618,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -650,5 +659,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 2305b4374fd3..9d00e0dd2ba9 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -154,6 +154,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -375,7 +376,8 @@ int ext4_multi_mount_protect(struct super_block *sb, /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1cb42d940784..ee615a93af6e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1507,6 +1507,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; @@ -2207,6 +2208,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 24aeedb8fc75..68b39e75446a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -512,17 +512,26 @@ int ext4_bio_write_page(struct ext4_io_submit *io, gfp_t gfp_flags = GFP_NOFS; unsigned int enc_bytes = round_up(len, i_blocksize(inode)); + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(page, enc_bytes, 0, gfp_flags); if (IS_ERR(bounce_page)) { ret = PTR_ERR(bounce_page); - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { - if (io->io_bio) { + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_flags = GFP_NOFS; + if (io->io_bio) ext4_io_submit(io); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } - gfp_flags |= __GFP_NOFAIL; + else + gfp_flags |= __GFP_NOFAIL; + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry_encrypt; } diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index fef7755300c3..410c904cf59b 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -57,6 +57,7 @@ enum bio_post_read_step { STEP_INITIAL = 0, STEP_DECRYPT, STEP_VERITY, + STEP_MAX, }; struct bio_post_read_ctx { @@ -106,10 +107,22 @@ static void verity_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; - fsverity_verify_bio(ctx->bio); + /* + * fsverity_verify_bio() may call readpages() again, and although verity + * will be disabled for that, decryption may still be needed, causing + * another bio_post_read_ctx to be allocated. So to guarantee that + * mempool_alloc() never deadlocks we must free the current ctx first. + * This is safe because verity is the last post-read step. + */ + BUILD_BUG_ON(STEP_VERITY + 1 != STEP_MAX); + mempool_free(ctx, bio_post_read_ctx_pool); + bio->bi_private = NULL; - bio_post_read_processing(ctx); + fsverity_verify_bio(bio); + + __read_end_io(bio); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index a8c0f2b5b6e1..f178af1dffe0 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -542,8 +569,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -861,13 +888,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -911,9 +940,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -924,9 +955,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1190,7 +1221,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1272,7 +1304,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1400,11 +1432,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* @@ -1499,7 +1534,8 @@ static int ext4_flex_group_add(struct super_block *sb, for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2937a8873fe1..71e2b80ff4aa 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -970,6 +970,8 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -1000,15 +1002,23 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); - kvfree(sbi->s_flex_groups); + brelse(group_desc[i]); + kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -2335,8 +2345,8 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2345,22 +2355,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2368,6 +2393,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2385,12 +2411,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -2964,17 +2989,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ @@ -3595,9 +3614,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3768,6 +3788,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; @@ -3785,6 +3814,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); goto failed_mount; } /* @@ -3988,14 +4018,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } if (le32_to_cpu(es->s_log_block_size) > (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, @@ -4249,9 +4271,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc_array(db_count, - sizeof(struct buffer_head *), - GFP_KERNEL); + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, + sizeof(struct buffer_head *), + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4267,14 +4290,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4553,7 +4581,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4641,13 +4669,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); @@ -4676,9 +4710,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); @@ -5540,9 +5577,15 @@ static int ext4_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = 0; + if (dquot->dq_dqb.dqb_bsoftlimit && + (!limit || dquot->dq_dqb.dqb_bsoftlimit < limit)) + limit = dquot->dq_dqb.dqb_bsoftlimit; + if (dquot->dq_dqb.dqb_bhardlimit && + (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) + limit = dquot->dq_dqb.dqb_bhardlimit; + limit >>= sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { curblock = (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; @@ -5552,9 +5595,14 @@ static int ext4_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = 0; + if (dquot->dq_dqb.dqb_isoftlimit && + (!limit || dquot->dq_dqb.dqb_isoftlimit < limit)) + limit = dquot->dq_dqb.dqb_isoftlimit; + if (dquot->dq_dqb.dqb_ihardlimit && + (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) + limit = dquot->dq_dqb.dqb_ihardlimit; + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a034cd0ce021..930706e171fd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1180,19 +1180,6 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) int err = 0; bool direct_io = iocb->ki_flags & IOCB_DIRECT; - /* convert inline data for Direct I/O*/ - if (direct_io) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - - if (direct_io && allow_outplace_dio(inode, iocb, from)) - return 0; - - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - return 0; - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); if (map.m_len > map.m_lblk) @@ -3145,7 +3132,8 @@ int f2fs_migrate_page(struct address_space *mapping, #ifdef CONFIG_SWAP /* Copied from generic_swapfile_activate() to check any holes */ -static int check_swap_activate(struct file *swap_file, unsigned int max) +static int check_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; @@ -3156,6 +3144,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; + int nr_extents = 0; + int ret; blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; @@ -3167,7 +3157,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; - while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + while ((probe_block + blocks_per_page) <= last_block && + page_no < sis->max) { unsigned block_in_page; sector_t first_block; @@ -3207,13 +3198,27 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) highest_block = first_block; } + /* + * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks + */ + ret = add_swap_extent(sis, page_no, 1, first_block); + if (ret < 0) + goto out; + nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } - return 0; - + ret = nr_extents; + *span = 1 + highest_block - lowest_block; + if (page_no == 0) + page_no = 1; /* force Empty message */ + sis->max = page_no; + sis->pages = page_no - 1; + sis->highest_bit = page_no - 1; +out: + return ret; bad_bmap: pr_err("swapon: swapfile has holes\n"); return -EINVAL; @@ -3235,14 +3240,14 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) return ret; - ret = check_swap_activate(file, sis->max); - if (ret) + ret = check_swap_activate(sis, file, span); + if (ret < 0) return ret; set_inode_flag(inode, FI_PIN_FILE); f2fs_precache_extents(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return 0; + return ret; } static void f2fs_swap_deactivate(struct file *file) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c967cacf979e..93b6572cc6bd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -1069,24 +1069,27 @@ static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct qstr qstr = {.name = str, .len = len }; + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); - if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { + if (!inode || !IS_CASEFOLDED(inode)) { if (len != name->len) return -1; - return memcmp(str, name, len); + return memcmp(str, name->name, len); } - return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); + return f2fs_ci_compare(inode, name, &qstr, false); } static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); const struct unicode_map *um = sbi->s_encoding; + const struct inode *inode = READ_ONCE(dentry->d_inode); unsigned char *norm; int len, ret = 0; - if (!IS_CASEFOLDED(dentry->d_inode)) + if (!inode || !IS_CASEFOLDED(inode)) return 0; norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 85af112e868d..6c4436a5ce79 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,7 +50,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn = { .node_changed = false }; + struct dnode_of_data dn; int err; if (unlikely(f2fs_cp_error(sbi))) { @@ -63,6 +63,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto err; } + /* should do out of any locked page */ + f2fs_balance_fs(sbi, true); + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -120,8 +123,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); - f2fs_balance_fs(sbi, dn.node_changed); - sb_end_pagefault(inode->i_sb); err: return block_page_mkwrite_return(err); @@ -754,18 +755,12 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; @@ -3389,18 +3384,41 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } - } else { - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); + goto write; + } - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; - goto out; - } + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + goto write; + + if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Convert inline data for Direct I/O before entering + * f2fs_direct_IO(). + */ + err = f2fs_convert_inline_inode(inode); + if (err) + goto out_err; + /* + * If force_buffere_io() is true, we have to allocate + * blocks all the time, since f2fs_direct_IO will fall + * back to buffered IO. + */ + if (!f2fs_force_buffered_io(inode, iocb, from) && + allow_outplace_dio(inode, iocb, from)) + goto write; + } + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { +out_err: + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = err; + goto out; } +write: ret = __generic_file_write_iter(iocb, from); clear_inode_flag(inode, FI_NO_PREALLOC); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a1c507b0b4ac..5d9584281935 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -797,6 +797,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (whiteout) { f2fs_i_links_write(inode, false); + inode->i_state |= I_LINKABLE; *whiteout = inode; } else { d_tmpfile(dentry, inode); @@ -867,6 +868,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + if (flags & RENAME_WHITEOUT) { + err = f2fs_create_whiteout(old_dir, &whiteout); + if (err) + return err; + } + err = dquot_initialize(old_dir); if (err) goto out; @@ -898,17 +905,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); - if (err) - goto out_dir; - } - if (new_inode) { err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_whiteout; + goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, @@ -916,7 +917,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); - goto out_whiteout; + goto out_dir; } f2fs_balance_fs(sbi, true); @@ -948,7 +949,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } if (old_dir_entry) @@ -972,7 +973,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_ERR(old_page)) err = PTR_ERR(old_page); f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } } } @@ -991,7 +992,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (whiteout) { - whiteout->i_state |= I_LINKABLE; set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) @@ -1027,15 +1027,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_unlock_op(sbi); if (new_page) f2fs_put_page(new_page, 0); -out_whiteout: - if (whiteout) - iput(whiteout); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); out: + if (whiteout) + iput(whiteout); return err; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5111e1ffe58a..ac01c3f8863d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1213,12 +1213,10 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = 0; - if (dquot->dq_dqb.dqb_bsoftlimit) - limit = dquot->dq_dqb.dqb_bsoftlimit; - if (dquot->dq_dqb.dqb_bhardlimit && - (!limit || dquot->dq_dqb.dqb_bhardlimit < limit)) - limit = dquot->dq_dqb.dqb_bhardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); + if (limit) + limit >>= sb->s_blocksize_bits; if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; @@ -1228,12 +1226,8 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = 0; - if (dquot->dq_dqb.dqb_isoftlimit) - limit = dquot->dq_dqb.dqb_isoftlimit; - if (dquot->dq_dqb.dqb_ihardlimit && - (!limit || dquot->dq_dqb.dqb_ihardlimit < limit)) - limit = dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); if (limit && buf->f_files > limit) { buf->f_files = limit; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 70945ceb9c0c..e79c86b8553a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -733,10 +733,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -757,8 +759,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -786,4 +791,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5f04c5c810fb..d40cbad16659 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -749,6 +749,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1373,16 +1380,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1843,13 +1840,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 335607b8c5c0..76ac9c7d32ec 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2063,7 +2063,7 @@ void wb_workfn(struct work_struct *work) struct bdi_writeback, dwork); long pages_written; - set_worker_desc("flush-%s", dev_name(wb->bdi->dev)); + set_worker_desc("flush-%s", bdi_dev_name(wb->bdi)); current->flags |= PF_SWAPWRITE; if (likely(!current_is_workqueue_rescuer() || diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8e02d76fe104..97eec7522bf2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; - async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with @@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); } - if (async) + if (test_bit(FR_ASYNC, &req->flags)) req->args->end(fc, req->args, req->out.h.error); put_request: fuse_put_request(fc, req); @@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; req->args = args; + if (args->end) + __set_bit(FR_ASYNC, &req->flags); } ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ce715380143c..3dd37a998ea9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -803,6 +803,10 @@ static int fuse_do_readpage(struct file *file, struct page *page) attr_ver = fuse_get_attr_version(fc); + /* Don't overflow end offset */ + if (pos + (desc.length - 1) == LLONG_MAX) + desc.length--; + fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); res = fuse_simple_request(fc, &ia.ap.args); if (res < 0) @@ -888,6 +892,14 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) ap->args.out_pages = true; ap->args.page_zeroing = true; ap->args.page_replace = true; + + /* Don't overflow end offset */ + if (pos + (count - 1) == LLONG_MAX) { + count--; + ap->descs[ap->num_pages - 1].length--; + } + WARN_ON((loff_t) (pos + count) < 0); + fuse_read_args_fill(ia, file, pos, count, FUSE_READ); ia->read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { @@ -1465,6 +1477,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, } ia = NULL; if (nres < 0) { + iov_iter_revert(iter, nbytes); err = nres; break; } @@ -1473,8 +1486,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, count -= nres; res += nres; pos += nres; - if (nres != nbytes) + if (nres != nbytes) { + iov_iter_revert(iter, nbytes - nres); break; + } if (count) { max_pages = iov_iter_npages(iter, fc->max_pages); ia = fuse_io_alloc(io, max_pages); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aa75e2305b75..ca344bf71404 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -301,6 +301,7 @@ struct fuse_io_priv { * FR_SENT: request is in userspace, waiting for an answer * FR_FINISHED: request is finished * FR_PRIVATE: request is on private list + * FR_ASYNC: request is asynchronous */ enum fuse_req_flag { FR_ISREPLY, @@ -314,6 +315,7 @@ enum fuse_req_flag { FR_SENT, FR_FINISHED, FR_PRIVATE, + FR_ASYNC, }; /** diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9d58295ccf7a..cb26be6f4351 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -847,7 +847,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct gfs2_inode *ip = GFS2_I(inode); - ssize_t written = 0, ret; + ssize_t ret; ret = gfs2_rsqa_alloc(ip); if (ret) @@ -867,68 +867,58 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) - goto out; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = inode_to_bdi(inode); + goto out_unlock; ret = file_remove_privs(file); if (ret) - goto out2; + goto out_unlock; ret = file_update_time(file); if (ret) - goto out2; + goto out_unlock; if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; - loff_t pos, endbyte; - ssize_t buffered; + ssize_t buffered, ret2; - written = gfs2_file_direct_write(iocb, from); - if (written < 0 || !iov_iter_count(from)) - goto out2; + ret = gfs2_file_direct_write(iocb, from); + if (ret < 0 || !iov_iter_count(from)) + goto out_unlock; - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (unlikely(ret < 0)) - goto out2; - buffered = ret; + iocb->ki_flags |= IOCB_DSYNC; + current->backing_dev_info = inode_to_bdi(inode); + buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + current->backing_dev_info = NULL; + if (unlikely(buffered <= 0)) + goto out_unlock; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT - * semantics. + * semantics. If the writeback or invalidate fails, only report + * the direct I/O range as we don't know if the buffered pages + * made it to disk. */ - pos = iocb->ki_pos; - endbyte = pos + buffered - 1; - ret = filemap_write_and_wait_range(mapping, pos, endbyte); - if (!ret) { - iocb->ki_pos += buffered; - written += buffered; - invalidate_mapping_pages(mapping, - pos >> PAGE_SHIFT, - endbyte >> PAGE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } + iocb->ki_pos += buffered; + ret2 = generic_write_sync(iocb, buffered); + invalidate_mapping_pages(mapping, + (iocb->ki_pos - buffered) >> PAGE_SHIFT, + (iocb->ki_pos - 1) >> PAGE_SHIFT); + if (!ret || ret2 > 0) + ret += ret2; } else { + current->backing_dev_info = inode_to_bdi(inode); ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - if (likely(ret > 0)) + current->backing_dev_info = NULL; + if (likely(ret > 0)) { iocb->ki_pos += ret; + ret = generic_write_sync(iocb, ret); + } } -out2: - current->backing_dev_info = NULL; -out: +out_unlock: inode_unlock(inode); - if (likely(ret > 0)) { - /* Handle various SYNC-type writes */ - ret = generic_write_sync(iocb, ret); - } - return written ? written : ret; + return ret; } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index dafef10b91f1..a8e77926bbc4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1248,7 +1248,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(file->f_mode & FMODE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 55fed7daf2b1..b5cf50719006 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -259,7 +259,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, struct super_block *sb = sdp->sd_vfs; struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); + bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9); bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -422,7 +422,7 @@ static bool gfs2_jhead_pg_srch(struct gfs2_jdesc *jd, for (offset = 0; offset < PAGE_SIZE; offset += sdp->sd_sb.sb_bsize) { if (!__get_log_header(sdp, kaddr + offset, 0, &lh)) { - if (lh.lh_sequence > head->lh_sequence) + if (lh.lh_sequence >= head->lh_sequence) *head = lh; else { ret = true; @@ -472,6 +472,20 @@ static void gfs2_jhead_process_page(struct gfs2_jdesc *jd, unsigned long index, put_page(page); /* Once more for find_or_create_page */ } +static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) +{ + struct bio *new; + + new = bio_alloc(GFP_NOIO, nr_iovecs); + bio_copy_dev(new, prev); + new->bi_iter.bi_sector = bio_end_sector(prev); + new->bi_opf = prev->bi_opf; + new->bi_write_hint = prev->bi_write_hint; + bio_chain(new, prev); + submit_bio(prev); + return new; +} + /** * gfs2_find_jhead - find the head of a log * @jd: The journal descriptor @@ -488,15 +502,15 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct address_space *mapping = jd->jd_inode->i_mapping; unsigned int block = 0, blocks_submitted = 0, blocks_read = 0; - unsigned int bsize = sdp->sd_sb.sb_bsize; + unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int readhead_blocks = BIO_MAX_PAGES << shift; + unsigned int readahead_blocks = BIO_MAX_PAGES << shift; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; struct page *page = NULL; - bool done = false; + bool bio_chained = false, done = false; errseq_t since; memset(head, 0, sizeof(*head)); @@ -505,9 +519,9 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, since = filemap_sample_wb_err(mapping); list_for_each_entry(je, &jd->extent_list, list) { - for (; block < je->lblock + je->blocks; block++) { - u64 dblock; + u64 dblock = je->dblock; + for (; block < je->lblock + je->blocks; block++, dblock++) { if (!page) { page = find_or_create_page(mapping, block >> shift, GFP_NOFS); @@ -516,35 +530,41 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, done = true; goto out; } + off = 0; } - if (bio) { - unsigned int off; - - off = (block << bsize_shift) & ~PAGE_MASK; + if (!bio || (bio_chained && !off)) { + /* start new bio */ + } else { sz = bio_add_page(bio, page, bsize, off); - if (sz == bsize) { /* block added */ - if (off + bsize == PAGE_SIZE) { - page = NULL; - goto page_added; - } - continue; + if (sz == bsize) + goto block_added; + if (off) { + unsigned int blocks = + (PAGE_SIZE - off) >> bsize_shift; + + bio = gfs2_chain_bio(bio, blocks); + bio_chained = true; + goto add_block_to_new_bio; } + } + + if (bio) { blocks_submitted = block + 1; submit_bio(bio); - bio = NULL; } - dblock = je->dblock + (block - je->lblock); bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); bio->bi_opf = REQ_OP_READ; - sz = bio_add_page(bio, page, bsize, 0); - gfs2_assert_warn(sdp, sz == bsize); - if (bsize == PAGE_SIZE) + bio_chained = false; +add_block_to_new_bio: + sz = bio_add_page(bio, page, bsize, off); + BUG_ON(sz != bsize); +block_added: + off += bsize; + if (off == PAGE_SIZE) page = NULL; - -page_added: - if (blocks_submitted < blocks_read + readhead_blocks) { + if (blocks_submitted < blocks_read + readahead_blocks) { /* Keep at least one bio in flight */ continue; } diff --git a/fs/inode.c b/fs/inode.c index 96d62d97694e..c5267a4db0f5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -137,6 +137,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; diff --git a/fs/io-wq.c b/fs/io-wq.c index 5147d2213b01..1f46fe663b28 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "io-wq.h" @@ -58,6 +59,7 @@ struct io_worker { struct mm_struct *mm; const struct cred *creds; struct files_struct *restore_files; + struct fs_struct *restore_fs; }; #if BITS_PER_LONG == 64 @@ -150,6 +152,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) task_unlock(current); } + if (current->fs != worker->restore_fs) + current->fs = worker->restore_fs; + /* * If we have an active mm, we need to drop the wq lock before unusing * it. If we do, return true and let the caller retry the idle loop. @@ -310,6 +315,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker) worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); worker->restore_files = current->files; + worker->restore_fs = current->fs; io_wqe_inc_running(wqe, worker); } @@ -456,6 +462,8 @@ static void io_worker_handle_work(struct io_worker *worker) } if (!worker->creds) worker->creds = override_creds(wq->creds); + if (work->fs && current->fs != work->fs) + current->fs = work->fs; if (test_bit(IO_WQ_BIT_CANCEL, &wq->state)) work->flags |= IO_WQ_WORK_CANCEL; if (worker->mm) @@ -658,11 +666,16 @@ static int io_wq_manager(void *data) /* create fixed workers */ refcount_set(&wq->refs, workers_to_create); for_each_node(node) { + if (!node_online(node)) + continue; if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND)) goto err; workers_to_create--; } + while (workers_to_create--) + refcount_dec(&wq->refs); + complete(&wq->done); while (!kthread_should_stop()) { @@ -670,6 +683,9 @@ static int io_wq_manager(void *data) struct io_wqe *wqe = wq->wqes[node]; bool fork_worker[2] = { false, false }; + if (!node_online(node)) + continue; + spin_lock_irq(&wqe->lock); if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND)) fork_worker[IO_WQ_ACCT_BOUND] = true; @@ -717,6 +733,17 @@ static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct, return true; } +static void io_run_cancel(struct io_wq_work *work) +{ + do { + struct io_wq_work *old_work = work; + + work->flags |= IO_WQ_WORK_CANCEL; + work->func(&work); + work = (work == old_work) ? NULL : work; + } while (work); +} + static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); @@ -729,8 +756,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) * It's close enough to not be an issue, fork() has the same delay. */ if (unlikely(!io_wq_can_queue(wqe, acct, work))) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return; } @@ -785,7 +811,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe, list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { if (io_worker_get(worker)) { - ret = func(worker, data); + /* no task if node is/was offline */ + if (worker->task) + ret = func(worker, data); io_worker_release(worker); if (ret) break; @@ -864,8 +892,7 @@ static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe, spin_unlock_irqrestore(&wqe->lock, flags); if (found) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return IO_WQ_CANCEL_OK; } @@ -939,8 +966,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, spin_unlock_irqrestore(&wqe->lock, flags); if (found) { - work->flags |= IO_WQ_WORK_CANCEL; - work->func(&work); + io_run_cancel(work); return IO_WQ_CANCEL_OK; } @@ -998,6 +1024,8 @@ void io_wq_flush(struct io_wq *wq) for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; + if (!node_online(node)) + continue; init_completion(&data.done); INIT_IO_WORK(&data.work, io_wq_flush_func); data.work.flags |= IO_WQ_WORK_INTERNAL; @@ -1030,12 +1058,15 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) for_each_node(node) { struct io_wqe *wqe; + int alloc_node = node; - wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node); + if (!node_online(alloc_node)) + alloc_node = NUMA_NO_NODE; + wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); if (!wqe) goto err; wq->wqes[node] = wqe; - wqe->node = node; + wqe->node = alloc_node; wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0); if (wq->user) { @@ -1043,7 +1074,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) task_rlimit(current, RLIMIT_NPROC); } atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0); - wqe->node = node; wqe->wq = wq; spin_lock_init(&wqe->lock); INIT_WQ_LIST(&wqe->work_list); diff --git a/fs/io-wq.h b/fs/io-wq.h index 3f5e356de980..bbab98d1d328 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -72,6 +72,7 @@ struct io_wq_work { }; void (*func)(struct io_wq_work **); struct files_struct *files; + struct fs_struct *fs; unsigned flags; }; @@ -79,8 +80,9 @@ struct io_wq_work { do { \ (work)->list.next = NULL; \ (work)->func = _func; \ - (work)->flags = 0; \ (work)->files = NULL; \ + (work)->fs = NULL; \ + (work)->flags = 0; \ } while (0) \ typedef void (get_work_fn)(struct io_wq_work *); diff --git a/fs/io_uring.c b/fs/io_uring.c index e54556b0fcc6..faa0198c99ff 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -188,6 +188,7 @@ struct io_ring_ctx { bool account_mem; bool cq_overflow_flushed; bool drain_next; + bool eventfd_async; /* * Ring buffer of indices into array of io_uring_sqe, which is @@ -687,6 +688,7 @@ static void io_kill_timeout(struct io_kiocb *req) if (ret != -1) { atomic_inc(&req->ctx->cq_timeouts); list_del_init(&req->list); + req->flags |= REQ_F_COMP_LOCKED; io_cqring_fill_event(req, 0); io_put_req(req); } @@ -735,16 +737,30 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) return &rings->cqes[tail & ctx->cq_mask]; } -static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) +{ + if (!ctx->cq_ev_fd) + return false; + if (!ctx->eventfd_async) + return true; + return io_wq_current_is_worker() || in_interrupt(); +} + +static void __io_cqring_ev_posted(struct io_ring_ctx *ctx, bool trigger_ev) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (waitqueue_active(&ctx->sqo_wait)) wake_up(&ctx->sqo_wait); - if (ctx->cq_ev_fd) + if (trigger_ev) eventfd_signal(ctx->cq_ev_fd, 1); } +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ + __io_cqring_ev_posted(ctx, io_should_trigger_evfd(ctx)); +} + /* Returns true if there are no backlogged entries after the flush */ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) { @@ -1084,10 +1100,10 @@ static void io_free_req(struct io_kiocb *req) __attribute__((nonnull)) static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr) { - io_req_find_next(req, nxtptr); - - if (refcount_dec_and_test(&req->refs)) + if (refcount_dec_and_test(&req->refs)) { + io_req_find_next(req, nxtptr); __io_free_req(req); + } } static void io_put_req(struct io_kiocb *req) @@ -1271,11 +1287,17 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) +static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, + long min) { int iters = 0, ret = 0; + /* + * We disallow the app entering submit/complete with polling, but we + * still need to lock the ring to prevent racing with polled issue + * that got punted to a workqueue. + */ + mutex_lock(&ctx->uring_lock); do { int tmin = 0; @@ -1311,21 +1333,6 @@ static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, ret = 0; } while (min && !*nr_events && !need_resched()); - return ret; -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events, - long min) -{ - int ret; - - /* - * We disallow the app entering submit/complete with polling, but we - * still need to lock the ring to prevent racing with polled issue - * that got punted to a workqueue. - */ - mutex_lock(&ctx->uring_lock); - ret = __io_iopoll_check(ctx, nr_events, min); mutex_unlock(&ctx->uring_lock); return ret; } @@ -1429,6 +1436,10 @@ static void io_iopoll_req_issued(struct io_kiocb *req) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); + + if ((ctx->flags & IORING_SETUP_SQPOLL) && + wq_has_sleeper(&ctx->sqo_wait)) + wake_up(&ctx->sqo_wait); } static void io_file_put(struct io_submit_state *state) @@ -1771,17 +1782,6 @@ static int io_alloc_async_ctx(struct io_kiocb *req) return req->io == NULL; } -static void io_rw_async(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - struct iovec *iov = NULL; - - if (req->io->rw.iov != req->io->rw.fast_iov) - iov = req->io->rw.iov; - io_wq_submit_work(workptr); - kfree(iov); -} - static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, struct iovec *iovec, struct iovec *fast_iov, struct iov_iter *iter) @@ -1789,11 +1789,12 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, if (req->opcode == IORING_OP_READ_FIXED || req->opcode == IORING_OP_WRITE_FIXED) return 0; - if (!req->io && io_alloc_async_ctx(req)) - return -ENOMEM; + if (!req->io) { + if (io_alloc_async_ctx(req)) + return -ENOMEM; - io_req_map_rw(req, io_size, iovec, fast_iov, iter); - req->work.func = io_rw_async; + io_req_map_rw(req, io_size, iovec, fast_iov, iter); + } return 0; } @@ -1880,8 +1881,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt, } } out_free: - if (!io_wq_current_is_worker()) - kfree(iovec); + kfree(iovec); return ret; } @@ -1974,6 +1974,12 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, ret2 = call_write_iter(req->file, kiocb, &iter); else ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter); + /* + * Raw bdev writes will -EOPNOTSUPP for IOCB_NOWAIT. Just + * retry them without IOCB_NOWAIT. + */ + if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) + ret2 = -EAGAIN; if (!force_nonblock || ret2 != -EAGAIN) { kiocb_done(kiocb, ret2, nxt, req->in_async); } else { @@ -1986,8 +1992,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, } } out_free: - if (!io_wq_current_is_worker()) - kfree(iovec); + kfree(iovec); return ret; } @@ -2157,19 +2162,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt, return 0; } -#if defined(CONFIG_NET) -static void io_sendrecv_async(struct io_wq_work **workptr) -{ - struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); - struct iovec *iov = NULL; - - if (req->io->rw.iov != req->io->rw.fast_iov) - iov = req->io->msg.iov; - io_wq_submit_work(workptr); - kfree(iov); -} -#endif - static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { #if defined(CONFIG_NET) @@ -2179,6 +2171,11 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io) return 0; @@ -2237,17 +2234,19 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt, if (force_nonblock && ret == -EAGAIN) { if (req->io) return -EAGAIN; - if (io_alloc_async_ctx(req)) + if (io_alloc_async_ctx(req)) { + if (kmsg && kmsg->iov != kmsg->fast_iov) + kfree(kmsg->iov); return -ENOMEM; + } memcpy(&req->io->msg, &io.msg, sizeof(io.msg)); - req->work.func = io_sendrecv_async; return -EAGAIN; } if (ret == -ERESTARTSYS) ret = -EINTR; } - if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) + if (kmsg && kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); io_cqring_add_event(req, ret); if (ret < 0) @@ -2269,6 +2268,11 @@ static int io_recvmsg_prep(struct io_kiocb *req, sr->msg_flags = READ_ONCE(sqe->msg_flags); sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr)); +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + if (!io) return 0; @@ -2329,17 +2333,19 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt, if (force_nonblock && ret == -EAGAIN) { if (req->io) return -EAGAIN; - if (io_alloc_async_ctx(req)) + if (io_alloc_async_ctx(req)) { + if (kmsg && kmsg->iov != kmsg->fast_iov) + kfree(kmsg->iov); return -ENOMEM; + } memcpy(&req->io->msg, &io.msg, sizeof(io.msg)); - req->work.func = io_sendrecv_async; return -EAGAIN; } if (ret == -ERESTARTSYS) ret = -EINTR; } - if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov) + if (kmsg && kmsg->iov != kmsg->fast_iov) kfree(kmsg->iov); io_cqring_add_event(req, ret); if (ret < 0) @@ -2635,6 +2641,14 @@ static void io_poll_complete_work(struct io_wq_work **workptr) io_wq_assign_next(workptr, nxt); } +static void io_poll_trigger_evfd(struct io_wq_work **workptr) +{ + struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work); + + eventfd_signal(req->ctx->cq_ev_fd, 1); + io_put_req(req); +} + static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, void *key) { @@ -2657,13 +2671,21 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * for finalizing the request, mark us as having grabbed that already. */ if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { + bool trigger_ev; + hash_del(&req->hash_node); io_poll_complete(req, mask, 0); - req->flags |= REQ_F_COMP_LOCKED; - io_put_req(req); + trigger_ev = io_should_trigger_evfd(ctx); + if (trigger_ev && eventfd_signal_count()) { + trigger_ev = false; + req->work.func = io_poll_trigger_evfd; + } else { + req->flags |= REQ_F_COMP_LOCKED; + io_put_req(req); + req = NULL; + } spin_unlock_irqrestore(&ctx->completion_lock, flags); - - io_cqring_ev_posted(ctx); + __io_cqring_ev_posted(ctx, trigger_ev); } else { io_queue_async_work(req); } @@ -3076,6 +3098,9 @@ static int io_req_defer_prep(struct io_kiocb *req, { ssize_t ret = 0; + if (!sqe) + return 0; + switch (req->opcode) { case IORING_OP_NOP: break; @@ -3548,7 +3573,7 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe) err: /* drop submission reference */ - io_put_req(req); + io_put_req_find_next(req, &nxt); if (linked_timeout) { if (!ret) @@ -3659,6 +3684,11 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, req->flags |= REQ_F_HARDLINK; INIT_LIST_HEAD(&req->link_list); + + if (io_alloc_async_ctx(req)) { + ret = -EAGAIN; + goto err_req; + } ret = io_req_defer_prep(req, sqe); if (ret) req->flags |= REQ_F_FAIL_LINK; @@ -3840,9 +3870,8 @@ static int io_sq_thread(void *data) const struct cred *old_cred; mm_segment_t old_fs; DEFINE_WAIT(wait); - unsigned inflight; unsigned long timeout; - int ret; + int ret = 0; complete(&ctx->completions[1]); @@ -3850,39 +3879,19 @@ static int io_sq_thread(void *data) set_fs(USER_DS); old_cred = override_creds(ctx->creds); - ret = timeout = inflight = 0; + timeout = jiffies + ctx->sq_thread_idle; while (!kthread_should_park()) { unsigned int to_submit; - if (inflight) { + if (!list_empty(&ctx->poll_list)) { unsigned nr_events = 0; - if (ctx->flags & IORING_SETUP_IOPOLL) { - /* - * inflight is the count of the maximum possible - * entries we submitted, but it can be smaller - * if we dropped some of them. If we don't have - * poll entries available, then we know that we - * have nothing left to poll for. Reset the - * inflight count to zero in that case. - */ - mutex_lock(&ctx->uring_lock); - if (!list_empty(&ctx->poll_list)) - __io_iopoll_check(ctx, &nr_events, 0); - else - inflight = 0; - mutex_unlock(&ctx->uring_lock); - } else { - /* - * Normal IO, just pretend everything completed. - * We don't have to poll completions for that. - */ - nr_events = inflight; - } - - inflight -= nr_events; - if (!inflight) + mutex_lock(&ctx->uring_lock); + if (!list_empty(&ctx->poll_list)) + io_iopoll_getevents(ctx, &nr_events, 0); + else timeout = jiffies + ctx->sq_thread_idle; + mutex_unlock(&ctx->uring_lock); } to_submit = io_sqring_entries(ctx); @@ -3892,19 +3901,6 @@ static int io_sq_thread(void *data) * to enter the kernel to reap and flush events. */ if (!to_submit || ret == -EBUSY) { - /* - * We're polling. If we're within the defined idle - * period, then let us spin without work before going - * to sleep. The exception is if we got EBUSY doing - * more IO, we should wait for the application to - * reap events and wake us up. - */ - if (inflight || - (!time_after(jiffies, timeout) && ret != -EBUSY)) { - cond_resched(); - continue; - } - /* * Drop cur_mm before scheduling, we can't hold it for * long periods (or over schedule()). Do this before @@ -3917,9 +3913,36 @@ static int io_sq_thread(void *data) cur_mm = NULL; } + /* + * We're polling. If we're within the defined idle + * period, then let us spin without work before going + * to sleep. The exception is if we got EBUSY doing + * more IO, we should wait for the application to + * reap events and wake us up. + */ + if (!list_empty(&ctx->poll_list) || + (!time_after(jiffies, timeout) && ret != -EBUSY && + !percpu_ref_is_dying(&ctx->refs))) { + cond_resched(); + continue; + } + prepare_to_wait(&ctx->sqo_wait, &wait, TASK_INTERRUPTIBLE); + /* + * While doing polled IO, before going to sleep, we need + * to check if there are new reqs added to poll_list, it + * is because reqs may have been punted to io worker and + * will be added to poll_list later, hence check the + * poll_list again. + */ + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !list_empty_careful(&ctx->poll_list)) { + finish_wait(&ctx->sqo_wait, &wait); + continue; + } + /* Tell userspace we may need a wakeup call */ ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; /* make sure to read SQ tail after writing flags */ @@ -3948,8 +3971,7 @@ static int io_sq_thread(void *data) mutex_lock(&ctx->uring_lock); ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true); mutex_unlock(&ctx->uring_lock); - if (ret > 0) - inflight += ret; + timeout = jiffies + ctx->sq_thread_idle; } set_fs(old_fs); @@ -4962,7 +4984,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) if (READ_ONCE(ctx->rings->sq.tail) - ctx->cached_sq_head != ctx->rings->sq_ring_entries) mask |= EPOLLOUT | EPOLLWRNORM; - if (READ_ONCE(ctx->rings->cq.head) != ctx->cached_cq_tail) + if (io_cqring_events(ctx, false)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -4981,6 +5003,16 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) percpu_ref_kill(&ctx->refs); mutex_unlock(&ctx->uring_lock); + /* + * Wait for sq thread to idle, if we have one. It won't spin on new + * work after we've killed the ctx ref above. This is important to do + * before we cancel existing commands, as the thread could otherwise + * be queueing new work post that. If that's work we need to cancel, + * it could cause shutdown to hang. + */ + while (ctx->sqo_thread && !wq_has_sleeper(&ctx->sqo_wait)) + cpu_relax(); + io_kill_timeouts(ctx); io_poll_remove_all(ctx); @@ -5473,10 +5505,17 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = io_sqe_files_update(ctx, arg, nr_args); break; case IORING_REGISTER_EVENTFD: + case IORING_REGISTER_EVENTFD_ASYNC: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg); + if (ret) + break; + if (opcode == IORING_REGISTER_EVENTFD_ASYNC) + ctx->eventfd_async = 1; + else + ctx->eventfd_async = 0; break; case IORING_UNREGISTER_EVENTFD: ret = -EINVAL; diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 8fff6677a5da..96bf33986d03 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -164,7 +164,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); - jbd2_journal_abort(journal, 0); + jbd2_journal_abort(journal, -EIO); } write_lock(&journal->j_state_lock); } else { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7f0b362b3842..27373f5792a4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -782,7 +782,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -875,7 +875,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); @@ -976,29 +976,33 @@ void jbd2_journal_commit_transaction(journal_t *journal) * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * out hands, which alse need not to clear more bits + * because the page and buffers will be freed and can + * never be reused once we are done with them. */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + mapping = READ_ONCE(bh->b_page->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_req(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5e408ee24a1a..0b4280fcad91 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -982,6 +982,7 @@ static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) { + (*pos)++; return NULL; } @@ -1710,6 +1711,11 @@ int jbd2_journal_load(journal_t *journal) journal->j_devname); return -EFSCORRUPTED; } + /* + * clear JBD2_ABORT flag initialized in journal_init_common + * here to update log tail information with the newest seq. + */ + journal->j_flags &= ~JBD2_ABORT; /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory @@ -1717,7 +1723,6 @@ int jbd2_journal_load(journal_t *journal) if (journal_reset(journal)) goto recovery_error; - journal->j_flags &= ~JBD2_ABORT; journal->j_flags |= JBD2_LOADED; return 0; @@ -2142,8 +2147,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) if (journal->j_flags & JBD2_ABORT) { write_unlock(&journal->j_state_lock); - if (!old_errno && old_errno != -ESHUTDOWN && - errno == -ESHUTDOWN) + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) jbd2_journal_update_sb_errno(journal); return; } @@ -2151,12 +2155,10 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) { - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); - } + jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** @@ -2198,11 +2200,6 @@ static void __journal_abort_soft (journal_t *journal, int errno) * failure to disk. ext3_error, for example, now uses this * functionality. * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * */ void jbd2_journal_abort(journal_t *journal, int errno) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 27b9f9dee434..295e3cdb4461 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -936,8 +936,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, char *frozen_buffer = NULL; unsigned long start_lock, time_lock; - if (is_handle_aborted(handle)) - return -EROFS; journal = transaction->t_journal; jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); @@ -1152,8 +1150,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -1189,6 +1187,9 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int rc; + if (is_handle_aborted(handle)) + return -EROFS; + if (jbd2_write_access_granted(handle, bh, false)) return 0; @@ -1326,6 +1327,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; char *committed_data = NULL; + if (is_handle_aborted(handle)) + return -EROFS; + if (jbd2_write_access_granted(handle, bh, true)) return 0; @@ -2329,14 +2333,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, return -EBUSY; } /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. + * OK, buffer won't be reachable after truncate. We just clear + * b_modified to not confuse transaction credit accounting, and + * set j_next_transaction to the running transaction (if there + * is one) and mark buffer as freed so that commit code knows + * it should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; + jh->b_modified = 0; spin_unlock(&journal->j_list_lock); spin_unlock(&jh->b_state_lock); write_unlock(&journal->j_state_lock); @@ -2563,8 +2569,8 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) diff --git a/fs/locks.c b/fs/locks.c index 44b6da032842..b8a31c1c4fff 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); list_del_init(&waiter->fl_blocked_member); - waiter->fl_blocker = NULL; } static void __locks_wake_up_blocks(struct file_lock *blocker) @@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) waiter->fl_lmops->lm_notify(waiter); else wake_up(&waiter->fl_wait); + + /* + * The setting of fl_blocker to NULL marks the "done" + * point in deleting a block. Paired with acquire at the top + * of locks_delete_block(). + */ + smp_store_release(&waiter->fl_blocker, NULL); } } @@ -754,24 +760,41 @@ int locks_delete_block(struct file_lock *waiter) int status = -ENOENT; /* - * If fl_blocker is NULL, it won't be set again as this thread - * "owns" the lock and is the only one that might try to claim - * the lock. So it is safe to test fl_blocker locklessly. - * Also if fl_blocker is NULL, this waiter is not listed on - * fl_blocked_requests for some lock, so no other request can - * be added to the list of fl_blocked_requests for this - * request. So if fl_blocker is NULL, it is safe to - * locklessly check if fl_blocked_requests is empty. If both - * of these checks succeed, there is no need to take the lock. + * If fl_blocker is NULL, it won't be set again as this thread "owns" + * the lock and is the only one that might try to claim the lock. + * + * We use acquire/release to manage fl_blocker so that we can + * optimize away taking the blocked_lock_lock in many cases. + * + * The smp_load_acquire guarantees two things: + * + * 1/ that fl_blocked_requests can be tested locklessly. If something + * was recently added to that list it must have been in a locked region + * *before* the locked region when fl_blocker was set to NULL. + * + * 2/ that no other thread is accessing 'waiter', so it is safe to free + * it. __locks_wake_up_blocks is careful not to touch waiter after + * fl_blocker is released. + * + * If a lockless check of fl_blocker shows it to be NULL, we know that + * no new locks can be inserted into its fl_blocked_requests list, and + * can avoid doing anything further if the list is empty. */ - if (waiter->fl_blocker == NULL && + if (!smp_load_acquire(&waiter->fl_blocker) && list_empty(&waiter->fl_blocked_requests)) return status; + spin_lock(&blocked_lock_lock); if (waiter->fl_blocker) status = 0; __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); + + /* + * The setting of fl_blocker to NULL marks the "done" point in deleting + * a block. Paired with acquire at the top of this function. + */ + smp_store_release(&waiter->fl_blocker, NULL); spin_unlock(&blocked_lock_lock); return status; } @@ -1364,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -1449,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); + error = wait_event_interruptible(fl.fl_wait, + list_empty(&fl.fl_blocked_member)); if (!error) { /* * If we've been sleeping someone might have @@ -1652,7 +1677,8 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_blocker, break_time); + list_empty(&new_fl->fl_blocked_member), + break_time); percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -2136,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -2413,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } diff --git a/fs/namei.c b/fs/namei.c index 4fb61e0754ed..70eb4bfeaebc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1367,7 +1367,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->path.dentry = parent; nd->seq = seq; if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + return -ECHILD; break; } else { struct mount *mnt = real_mount(nd->path.mnt); @@ -3202,8 +3202,8 @@ static int do_last(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; - kuid_t dir_uid = dir->d_inode->i_uid; - umode_t dir_mode = dir->d_inode->i_mode; + kuid_t dir_uid = nd->inode->i_uid; + umode_t dir_mode = nd->inode->i_mode; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 295a7a21b774..e7dd07f47825 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -90,7 +90,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fe57b2b5314a..8e322bacde69 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -25,13 +25,29 @@ #include "internal.h" #include "nfs4trace.h" -static void nfs_free_delegation(struct nfs_delegation *delegation) +static atomic_long_t nfs_active_delegations; + +static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); delegation->cred = NULL; kfree_rcu(delegation, rcu); } +static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +{ + if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; + atomic_long_dec(&nfs_active_delegations); + } +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + nfs_mark_delegation_revoked(delegation); + __nfs_free_delegation(delegation); +} + /** * nfs_mark_delegation_referenced - set delegation's REFERENCED flag * @delegation: delegation to process @@ -222,13 +238,18 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) { + const struct cred *cred; int res = 0; - if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) - res = nfs4_proc_delegreturn(inode, - delegation->cred, + if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + spin_lock(&delegation->lock); + cred = get_cred(delegation->cred); + spin_unlock(&delegation->lock); + res = nfs4_proc_delegreturn(inode, cred, &delegation->stateid, issync); + put_cred(cred); + } return res; } @@ -343,7 +364,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, delegation->stateid.seqid = update->stateid.seqid; smp_wmb(); delegation->type = update->type; - clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); } } @@ -423,6 +445,8 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; + atomic_long_inc(&nfs_active_delegations); + trace_nfs4_set_delegation(inode, type); spin_lock(&inode->i_lock); @@ -432,7 +456,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, out: spin_unlock(&clp->cl_lock); if (delegation != NULL) - nfs_free_delegation(delegation); + __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); nfs_free_delegation(freeme); @@ -760,13 +784,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_mark_delegation_revoked(struct nfs_server *server, - struct nfs_delegation *delegation) -{ - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; -} - static void nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { @@ -794,7 +811,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); ret = true; out: rcu_read_unlock(); @@ -833,7 +850,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); out_clear_returning: clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e180033e35cf..05ed7be8a634 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -162,6 +162,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -610,6 +622,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -626,8 +640,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -682,6 +694,7 @@ int nfs_readdir_filler(void *data, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } @@ -689,8 +702,6 @@ int nfs_readdir_filler(void *data, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -704,19 +715,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -731,7 +751,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -770,7 +790,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -816,13 +835,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -887,6 +906,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 040a50fd9bf3..29f00da8a0b7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 927eb680f161..69971f6c840d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1fe83e0f663e..e2ae54b35dfe 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, } else { status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_sync_inode(dst_inode); if (status) @@ -334,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = handle_async_copy(res, dst_server, src_server, src, dst, &args->src_stateid, restart); if (status) - return status; + goto out; } if ((!res->synchronous || !args->sync) && res->write_res.verifier.committed != NFS_FILE_SYNC) { status = process_copy_commit(dst, pos_dst, res); if (status) - return status; + goto out; } truncate_pagecache_range(dst_inode, pos_dst, @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); nfs_put_lock_context(l_ctx); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; - + } status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.dst_fattr = nfs_alloc_fattr(); if (!res.dst_fattr) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a7a73b1d1fec..a5db055e2a9b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -446,9 +446,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); -extern void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed); +extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease); /* nfs4state.c */ diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 620de905cba9..3f892035c141 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -86,7 +86,6 @@ nfs4_file_open(struct inode *inode, struct file *filp) if (inode != d_inode(dentry)) goto out_drop; - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); nfs_fscache_open_file(inode, filp); err = 0; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 76d37161409a..13c2de527718 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2962,10 +2962,13 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, struct dentry *dentry; struct nfs4_state *state; fmode_t acc_mode = _nfs4_ctx_to_accessmode(ctx); + struct inode *dir = d_inode(opendata->dir); + unsigned long dir_verifier; unsigned int seq; int ret; seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); + dir_verifier = nfs_save_change_attribute(dir); ret = _nfs4_proc_open(opendata, ctx); if (ret != 0) @@ -2993,8 +2996,19 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dput(ctx->dentry); ctx->dentry = dentry = alias; } - nfs_set_verifier(dentry, - nfs_save_change_attribute(d_inode(opendata->dir))); + } + + switch(opendata->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + if (!opendata->rpc_done) + break; + if (opendata->o_res.delegation_type != 0) + dir_verifier = nfs_save_change_attribute(dir); + nfs_set_verifier(dentry, dir_verifier); } /* Parse layoutget results before we check for access */ @@ -3187,6 +3201,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; @@ -3239,6 +3258,8 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; + else if (status == -EAGAIN) + goto zero_stateid; } else { zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); @@ -5019,16 +5040,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str struct nfs4_exception exception = { .interruptible = true, }; - unsigned long now = jiffies; int err; do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { - nfs4_set_lease_period(server->nfs_client, - fsinfo->lease_time * HZ, - now); + nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ); break; } err = nfs4_handle_exception(server, err, &exception); @@ -5291,7 +5309,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); } @@ -6084,6 +6102,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .callback_data = &setclientid, .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; + unsigned long now = jiffies; int status; /* nfs_client_id4 */ @@ -6116,6 +6135,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } + + if (status == 0) + do_renew_lease(clp, now); out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); @@ -8203,6 +8225,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre struct rpc_task *task; struct nfs41_exchange_id_args *argp; struct nfs41_exchange_id_res *resp; + unsigned long now = jiffies; int status; task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); @@ -8223,6 +8246,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; + do_renew_lease(clp, now); + clp->cl_clientid = resp->clientid; clp->cl_exchange_flags = resp->flags; clp->cl_seqid = resp->seqid; diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6ea431b067dd..ff876dda7f06 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp) * * @clp: pointer to nfs_client * @lease: new value for lease period - * @lastrenewed: time at which lease was last renewed */ void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed) + unsigned long lease) { spin_lock(&clp->cl_lock); clp->cl_lease_time = lease; - clp->cl_last_renewal = lastrenewed; spin_unlock(&clp->cl_lock); /* Cap maximum reconnect timeout at 1/2 lease period */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 34552329233d..f0b002734355 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp) { int status; struct nfs_fsinfo fsinfo; - unsigned long now; if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { nfs4_schedule_state_renewal(clp); return 0; } - now = jiffies; status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ); nfs4_schedule_state_renewal(clp); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e60b6fbd5ada..d405557cb43f 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -352,7 +352,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(dstaddr, clp->cl_hostname); ), @@ -432,7 +432,8 @@ TRACE_EVENT(nfs4_sequence_done, __entry->target_highest_slotid = res->sr_target_highest_slotid; __entry->status_flags = res->sr_status_flags; - __entry->error = res->sr_status; + __entry->error = res->sr_status < 0 ? + -res->sr_status : 0; ), TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " @@ -640,7 +641,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, ), TP_fast_assign( - __entry->error = status; + __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; __assign_str(hostname, clp->cl_hostname); __assign_str(section, section); @@ -659,7 +660,7 @@ TRACE_EVENT(nfs4_xdr_status, TP_PROTO( const struct xdr_stream *xdr, u32 op, - int error + u32 error ), TP_ARGS(xdr, op, error), @@ -849,7 +850,7 @@ TRACE_EVENT(nfs4_close, __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fmode = (__force unsigned int)state->state; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid.seqid); __entry->stateid_hash = @@ -914,7 +915,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -986,7 +987,7 @@ TRACE_EVENT(nfs4_set_lock, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -1164,7 +1165,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_fast_assign( __entry->dev = res->server->s_dev; __entry->fhandle = nfs_fhandle_hash(args->fhandle); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid->seqid); __entry->stateid_hash = @@ -1204,7 +1205,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); @@ -1306,7 +1307,7 @@ TRACE_EVENT(nfs4_lookupp, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = NFS_FILEID(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1342,7 +1343,7 @@ TRACE_EVENT(nfs4_rename, __entry->dev = olddir->i_sb->s_dev; __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(oldname, oldname->name); __assign_str(newname, newname->name); ), @@ -1433,7 +1434,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1489,7 +1490,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __entry->valid = fattr->valid; __entry->fhandle = nfs_fhandle_hash(fhandle); __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1536,7 +1537,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1593,7 +1594,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1896,7 +1897,7 @@ TRACE_EVENT(nfs4_layoutget, __entry->iomode = args->iomode; __entry->offset = args->offset; __entry->count = args->length; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 936c57779ff4..dc6b9c2f36b2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4097,7 +4097,7 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str status = NFS_ATTR_FATTR_ATIME; bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS; } - dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: atime=%lld\n", __func__, time->tv_sec); return status; } @@ -4115,7 +4115,7 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s status = NFS_ATTR_FATTR_CTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA; } - dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: ctime=%lld\n", __func__, time->tv_sec); return status; } @@ -4132,8 +4132,8 @@ static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, status = decode_attr_time(xdr, time); bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA; } - dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec, - (long)time->tv_nsec); + dprintk("%s: time_delta=%lld %ld\n", __func__, time->tv_sec, + time->tv_nsec); return status; } @@ -4197,7 +4197,7 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str status = NFS_ATTR_FATTR_MTIME; bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY; } - dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec); + dprintk("%s: mtime=%lld\n", __func__, time->tv_sec); return status; } @@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cec3070ab577..542ea8dfd1bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1425,7 +1425,7 @@ bool pnfs_roc(struct inode *ino, /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn || cred != lo->plh_lc_cred) + if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0) goto out_noroc; roc = layoutreturn; @@ -1998,8 +1998,6 @@ pnfs_update_layout(struct inode *ino, trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - if (status != -EAGAIN) - goto out_unlock; spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 82af4809b869..8b37e7f8e789 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 52cab65f91cf..913eb37c249b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -243,7 +243,15 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct address_space *mapping) { + struct inode *inode = mapping->host; + nfs_zap_mapping(mapping->host, mapping); + /* Force file size revalidation */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_SIZE; + spin_unlock(&inode->i_lock); } static void nfs_mapping_set_error(struct page *page, int error) @@ -1829,6 +1837,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1856,7 +1865,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 32a9bf22ac08..0a3e5c2aac4b 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -789,6 +789,7 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, *new; struct inode *inode; unsigned int hashval; + bool retry = true; /* FIXME: skip this if fh_dentry is already set? */ status = fh_verify(rqstp, fhp, S_IFREG, @@ -824,6 +825,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + if (!retry) { + status = nfserr_jukebox; + goto out; + } + retry = false; nfsd_file_put_noref(nf); goto retry; } diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 2681c70283ce..e12409eca7cc 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -675,7 +675,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 369e574c5092..e909ca36c255 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6550,7 +6550,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index d61b83b9654c..6bd915e4295e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -606,7 +606,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c0dc491537a6..82cf80dde5c7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -280,19 +280,25 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, * Commit metadata changes to stable storage. */ static int -commit_metadata(struct svc_fh *fhp) +commit_inode_metadata(struct inode *inode) { - struct inode *inode = d_inode(fhp->fh_dentry); const struct export_operations *export_ops = inode->i_sb->s_export_op; - if (!EX_ISSYNC(fhp->fh_export)) - return 0; - if (export_ops->commit_metadata) return export_ops->commit_metadata(inode); return sync_inode_metadata(inode, 1); } +static int +commit_metadata(struct svc_fh *fhp) +{ + struct inode *inode = d_inode(fhp->fh_dentry); + + if (!EX_ISSYNC(fhp->fh_export)) + return 0; + return commit_inode_metadata(inode); +} + /* * Go over the attributes and take care of the small differences between * NFS semantics and what Linux expects. @@ -537,6 +543,9 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, if (sync) { loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX; int status = vfs_fsync_range(dst, dst_pos, dst_end, 0); + + if (!status) + status = commit_inode_metadata(file_inode(src)); if (status < 0) return nfserrno(status); } @@ -975,6 +984,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, host_err = vfs_iter_write(file, &iter, &pos, flags); if (host_err < 0) goto out_nfserr; + *cnt = host_err; nfsdstats.io_write += *cnt; fsnotify_modify(file); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 6c7388430ad3..d4359a1df3d5 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -2899,18 +2899,12 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr) ia_valid |= ATTR_MTIME | ATTR_CTIME; } } - if (ia_valid & ATTR_ATIME) { - vi->i_atime = timestamp_truncate(attr->ia_atime, - vi); - } - if (ia_valid & ATTR_MTIME) { - vi->i_mtime = timestamp_truncate(attr->ia_mtime, - vi); - } - if (ia_valid & ATTR_CTIME) { - vi->i_ctime = timestamp_truncate(attr->ia_ctime, - vi); - } + if (ia_valid & ATTR_ATIME) + vi->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + vi->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + vi->i_ctime = attr->ia_ctime; mark_inode_dirty(vi); out: return err; diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index 38b224372776..5e700b45d32d 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/.. - obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 4de89af96abf..6abaded3ff6b 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -23,15 +23,15 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" #define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res, struct dlm_lock *lock); diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 965f45dbe17b..6051edc33aef 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -23,9 +23,9 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" @@ -33,7 +33,7 @@ #include "dlmconvert.h" #define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" +#include "../cluster/masklog.h" /* NOTE: __dlmconvert_master is the only function in here that * needs a spinlock held on entry (res->spinlock) and it is the diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 4d0b452012b2..c5c6efba7b5e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -17,9 +17,9 @@ #include #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" @@ -27,7 +27,7 @@ #include "dlmdebug.h" #define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static int stringify_lockname(const char *lockname, int locklen, char *buf, int len); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index ee6f459f9770..357cfc702ce3 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -20,9 +20,9 @@ #include #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" @@ -30,7 +30,7 @@ #include "dlmdebug.h" #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN) -#include "cluster/masklog.h" +#include "../cluster/masklog.h" /* * ocfs2 node maps are array of long int, which limits to send them freely diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index baff087f3863..83f0760e4fba 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -25,9 +25,9 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" @@ -35,7 +35,7 @@ #include "dlmconvert.h" #define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static struct kmem_cache *dlm_lock_cache; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 74b768ca1cd8..c9d7037b6793 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -25,9 +25,9 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" @@ -35,7 +35,7 @@ #include "dlmdebug.h" #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_MASTER) -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static void dlm_mle_node_down(struct dlm_ctxt *dlm, struct dlm_master_list_entry *mle, diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 064ce5bbc3f6..bcaaca5112d6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -26,16 +26,16 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" #include "dlmdomain.h" #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_RECOVERY) -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 61c51c268460..fd40c17cd022 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -25,16 +25,16 @@ #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" #include "dlmdomain.h" #define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_THREAD) -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static int dlm_thread(void *data); static void dlm_flush_asts(struct dlm_ctxt *dlm); diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 3883633e82eb..dcb17ca8ae74 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -23,15 +23,15 @@ #include #include -#include "cluster/heartbeat.h" -#include "cluster/nodemanager.h" -#include "cluster/tcp.h" +#include "../cluster/heartbeat.h" +#include "../cluster/nodemanager.h" +#include "../cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" #define MLOG_MASK_PREFIX ML_DLM -#include "cluster/masklog.h" +#include "../cluster/masklog.h" #define DLM_UNLOCK_FREE_LOCK 0x00000001 #define DLM_UNLOCK_CALL_AST 0x00000002 diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile index a9874e441bd4..c7895f65be0e 100644 --- a/fs/ocfs2/dlmfs/Makefile +++ b/fs/ocfs2/dlmfs/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/.. - obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o ocfs2_dlmfs-objs := userdlm.o dlmfs.o diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 4f1668c81e1f..8e4f1ace467c 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -33,11 +33,11 @@ #include -#include "stackglue.h" +#include "../stackglue.h" #include "userdlm.h" #define MLOG_MASK_PREFIX ML_DLMFS -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static const struct super_operations dlmfs_ops; diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index 525b14ddfba5..3df5be25bfb1 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -21,12 +21,12 @@ #include #include -#include "ocfs2_lockingver.h" -#include "stackglue.h" +#include "../ocfs2_lockingver.h" +#include "../stackglue.h" #include "userdlm.h" #define MLOG_MASK_PREFIX ML_DLMFS -#include "cluster/masklog.h" +#include "../cluster/masklog.h" static inline struct user_lock_res *user_lksb_to_lock_res(struct ocfs2_dlm_lksb *lksb) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9876db52913a..6cd5e4924e4d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2101,17 +2101,15 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, struct buffer_head **di_bh, int meta_level, - int overwrite_io, int write_sem, int wait) { int ret = 0; if (wait) - ret = ocfs2_inode_lock(inode, NULL, meta_level); + ret = ocfs2_inode_lock(inode, di_bh, meta_level); else - ret = ocfs2_try_inode_lock(inode, - overwrite_io ? NULL : di_bh, meta_level); + ret = ocfs2_try_inode_lock(inode, di_bh, meta_level); if (ret < 0) goto out; @@ -2136,6 +2134,7 @@ static int ocfs2_inode_lock_for_extent_tree(struct inode *inode, out_unlock: brelse(*di_bh); + *di_bh = NULL; ocfs2_inode_unlock(inode, meta_level); out: return ret; @@ -2177,7 +2176,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, ret = ocfs2_inode_lock_for_extent_tree(inode, &di_bh, meta_level, - overwrite_io, write_sem, wait); if (ret < 0) { @@ -2233,13 +2231,13 @@ static int ocfs2_prepare_inode_for_write(struct file *file, &di_bh, meta_level, write_sem); + meta_level = 1; + write_sem = 1; ret = ocfs2_inode_lock_for_extent_tree(inode, &di_bh, meta_level, - overwrite_io, - 1, + write_sem, wait); - write_sem = 1; if (ret < 0) { if (ret != -EAGAIN) mlog_errno(ret); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3103ba7f97a2..bfe611ed1b1d 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -597,9 +597,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, { struct ocfs2_inode_info *oi = OCFS2_I(inode); - oi->i_sync_tid = handle->h_transaction->t_tid; - if (datasync) - oi->i_datasync_tid = handle->h_transaction->t_tid; + if (!is_handle_aborted(handle)) { + oi->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + oi->i_datasync_tid = handle->h_transaction->t_tid; + } } #endif /* OCFS2_JOURNAL_H */ diff --git a/fs/open.c b/fs/open.c index b62f5c0923a8..dcbd01611237 100644 --- a/fs/open.c +++ b/fs/open.c @@ -860,9 +860,6 @@ static int do_dentry_open(struct file *f, * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 25543a966c48..29eaa4544372 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -273,6 +273,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index e235a635d9ec..15e4fa288475 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -146,7 +146,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) struct inode *inode = file_inode(file); struct fd real; const struct cred *old_cred; - ssize_t ret; + loff_t ret; /* * The two special cases below do not need to involve real fs, diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 47a91c9733a5..7255e6a5838f 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -504,7 +504,13 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) if (err) goto fail; - WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev); + /* + * Directory inode is always on overlay st_dev. + * Non-dir with ovl_same_dev() could be on pseudo st_dev in case + * of xino bits overflow. + */ + WARN_ON_ONCE(S_ISDIR(stat.mode) && + dir->d_sb->s_dev != stat.dev); ino = stat.ino; } else if (xinobits && !OVL_TYPE_UPPER(type)) { ino = ovl_remap_lower_ino(ino, xinobits, diff --git a/fs/pipe.c b/fs/pipe.c index 57502c3c0fba..5a34d6c22d4c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -108,16 +108,19 @@ void pipe_double_lock(struct pipe_inode_info *pipe1, /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe) { - DEFINE_WAIT(wait); + DEFINE_WAIT(rdwait); + DEFINE_WAIT(wrwait); /* * Pipes are system-local resources, so sleeping on them * is considered a noninteractive wait: */ - prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); + prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); pipe_unlock(pipe); schedule(); - finish_wait(&pipe->wait, &wait); + finish_wait(&pipe->rd_wait, &rdwait); + finish_wait(&pipe->wr_wait, &wrwait); pipe_lock(pipe); } @@ -286,7 +289,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; - bool was_full; + bool was_full, wake_next_reader = false; ssize_t ret; /* Null read succeeds. */ @@ -344,10 +347,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (!buf->len) { pipe_buf_release(pipe, buf); - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); tail++; pipe->tail = tail; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); } total_len -= chars; if (!total_len) @@ -384,7 +387,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * no data. */ if (unlikely(was_full)) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -394,18 +397,23 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * since we've done any required wakeups and there's no need * to mark anything accessed. And we've dropped the lock. */ - if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0) + if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) return -ERESTARTSYS; __pipe_lock(pipe); was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); + wake_next_reader = true; } + if (pipe_empty(pipe->head, pipe->tail)) + wake_next_reader = false; __pipe_unlock(pipe); if (was_full) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } + if (wake_next_reader) + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); if (ret > 0) file_accessed(filp); return ret; @@ -437,6 +445,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) size_t total_len = iov_iter_count(from); ssize_t chars; bool was_empty = false; + bool wake_next_writer = false; /* Null write succeeds. */ if (unlikely(total_len == 0)) @@ -515,16 +524,16 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * it, either the reader will consume it or it'll still * be there for the next write. */ - spin_lock_irq(&pipe->wait.lock); + spin_lock_irq(&pipe->rd_wait.lock); head = pipe->head; if (pipe_full(head, pipe->tail, pipe->max_usage)) { - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); continue; } pipe->head = head + 1; - spin_unlock_irq(&pipe->wait.lock); + spin_unlock_irq(&pipe->rd_wait.lock); /* Insert it into the buffer array */ buf = &pipe->bufs[head & mask]; @@ -576,14 +585,17 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) */ __pipe_unlock(pipe); if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - wait_event_interruptible(pipe->wait, pipe_writable(pipe)); + wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); + wake_next_writer = true; } out: + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + wake_next_writer = false; __pipe_unlock(pipe); /* @@ -596,9 +608,11 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * wake up pending jobs */ if (was_empty) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } + if (wake_next_writer) + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { int err = file_update_time(filp); if (err) @@ -642,12 +656,15 @@ pipe_poll(struct file *filp, poll_table *wait) unsigned int head, tail; /* - * Reading only -- no need for acquiring the semaphore. + * Reading pipe state only -- no need for acquiring the semaphore. * * But because this is racy, the code has to add the * entry to the poll table _first_ .. */ - poll_wait(filp, &pipe->wait, wait); + if (filp->f_mode & FMODE_READ) + poll_wait(filp, &pipe->rd_wait, wait); + if (filp->f_mode & FMODE_WRITE) + poll_wait(filp, &pipe->wr_wait, wait); /* * .. and only then can you do the racy tests. That way, @@ -706,7 +723,8 @@ pipe_release(struct inode *inode, struct file *file) pipe->writers--; if (pipe->readers || pipe->writers) { - wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLOUT | EPOLLRDNORM | EPOLLWRNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLHUP); + wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM | EPOLLERR | EPOLLHUP); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } @@ -789,7 +807,8 @@ struct pipe_inode_info *alloc_pipe_info(void) GFP_KERNEL_ACCOUNT); if (pipe->bufs) { - init_waitqueue_head(&pipe->wait); + init_waitqueue_head(&pipe->rd_wait); + init_waitqueue_head(&pipe->wr_wait); pipe->r_counter = pipe->w_counter = 1; pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; @@ -1007,7 +1026,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static void wake_up_partner(struct pipe_inode_info *pipe) { - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); + wake_up_interruptible(&pipe->wr_wait); } static int fifo_open(struct inode *inode, struct file *filp) @@ -1118,13 +1138,13 @@ static int fifo_open(struct inode *inode, struct file *filp) err_rd: if (!--pipe->readers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->wr_wait); ret = -ERESTARTSYS; goto err; err_wr: if (!--pipe->writers) - wake_up_interruptible(&pipe->wait); + wake_up_interruptible(&pipe->rd_wait); ret = -ERESTARTSYS; goto err; @@ -1251,7 +1271,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; - wake_up_interruptible_all(&pipe->wait); + wake_up_interruptible_all(&pipe->rd_wait); + wake_up_interruptible_all(&pipe->wr_wait); return pipe->max_usage * PAGE_SIZE; out_revert_acct: diff --git a/fs/proc/base.c b/fs/proc/base.c index ebea9501afb8..2ceef09504c7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -94,6 +94,8 @@ #include #include #include +#include +#include #include #include "internal.h" #include "fd.h" @@ -477,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, seq_puts(m, "0 0 0\n"); else seq_printf(m, "%llu %llu %lu\n", - (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)tsk_seruntime(task), (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); @@ -2990,6 +2992,149 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_STACKLEAK_METRICS */ +#ifdef CONFIG_KSM +static int ksm_open(struct inode *inode, struct file *file) +{ + struct task_struct *task; + struct mm_struct *mm; + int err; + + task = get_proc_task(inode); + if (!task) { + err = -ESRCH; + goto out; + } + if (task->flags & PF_KTHREAD) { + put_task_struct(task); + err = -EINVAL; + goto out; + } + + mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + put_task_struct(task); + if (!mm) { + err = -EINVAL; + goto out; + } + if (IS_ERR(mm)) { + err = PTR_ERR(mm); + goto out; + } + + /* ensure this mm_struct can't be freed */ + mmgrab(mm); + /* but do not pin its memory */ + mmput(mm); + + err = 0; + file->private_data = mm; + +out: + return err; +} + +static ssize_t ksm_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char kbuf[PROC_NUMBUF]; + char *pos; + int behaviour; + struct mm_struct *mm = file->private_data; + int err; + int last_err; + struct vm_area_struct *vma; + + if (!mm) { + err = -EINVAL; + goto out; + } + + /* Only allow a very narrow range of strings to be written */ + if ((*ppos != 0) || (count >= sizeof(kbuf))) { + err = -EINVAL; + goto out; + } + + /* What was written? */ + if (copy_from_user(kbuf, buf, count)) { + err = -EFAULT; + goto out; + } + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + if (strncmp(pos, "merge", 5) == 0) { + pos += 5; + behaviour = MADV_MERGEABLE; + } + else if (strncmp(pos, "unmerge", 7) == 0) { + pos += 7; + behaviour = MADV_UNMERGEABLE; + } + else { + err = -EINVAL; + goto out; + } + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') { + err = -EINVAL; + goto out; + } + + if (!mmget_not_zero(mm)) { + err = -EINVAL; + goto out; + } + + down_write(&mm->mmap_sem); + if (!mmget_still_valid(mm)) { + err = -EINVAL; + goto skip_mm; + } + + err = 0; + + vma = mm->mmap; + while (vma) { + if (behaviour == MADV_MERGEABLE) + last_err = ksm_madvise_merge(vma->vm_mm, vma, &vma->vm_flags); + else + last_err = ksm_madvise_unmerge(vma, vma->vm_start, vma->vm_end, &vma->vm_flags); + if (last_err) + err = last_err; + vma = vma->vm_next; + } + +skip_mm: + up_write(&mm->mmap_sem); + + mmput(mm); + +out: + return err ? err : count; +} + +static int ksm_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + + if (mm) + mmdrop(mm); + + return 0; +} + +static const struct file_operations proc_ksm_operations = { + .open = ksm_open, + .write = ksm_write, + .llseek = noop_llseek, + .release = ksm_release, +}; +#endif /* CONFIG_KSM */ + /* * Thread groups */ @@ -3097,6 +3242,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif +#ifdef CONFIG_KSM + REG("ksm", S_IRUGO|S_IWUSR, proc_ksm_operations), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/read_write.c b/fs/read_write.c index 5bbf587f5bc1..7458fccc59e1 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1777,10 +1777,9 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len, * else. Assume that the offsets have already been checked for block * alignment. * - * For deduplication we always scale down to the previous block because we - * can't meaningfully compare post-EOF contents. - * - * For clone we only link a partial EOF block above the destination file's EOF. + * For clone we only link a partial EOF block above or at the destination file's + * EOF. For deduplication we accept a partial EOF block only if it ends at the + * destination file's EOF (can not link it into the middle of a file). * * Shorten the request if possible. */ @@ -1796,8 +1795,7 @@ static int generic_remap_check_len(struct inode *inode_in, if ((*len & blkmask) == 0) return 0; - if ((remap_flags & REMAP_FILE_DEDUP) || - pos_out + *len < i_size_read(inode_out)) + if (pos_out + *len < i_size_read(inode_out)) new_len &= ~blkmask; if (new_len == *len) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index da9ebe33882b..bb4973aefbb1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2246,7 +2246,8 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th, /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + if (inode) + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3244037b1286..a6bce5b1fb1d 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -629,6 +629,7 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); destroy_workqueue(REISERFS_SB(s)->commit_wq); + kfree(REISERFS_SB(s)->s_jdev); kfree(s->s_fs_info); s->s_fs_info = NULL; } @@ -1947,7 +1948,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); - goto error; + goto error_unlocked; } } #ifdef CONFIG_QUOTA @@ -2240,6 +2241,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) kfree(qf_names[j]); } #endif + kfree(sbi->s_jdev); kfree(sbi); s->s_fs_info = NULL; diff --git a/fs/splice.c b/fs/splice.c index 3009652a41c8..d671936d0aad 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -165,8 +165,8 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { static void wakeup_pipe_readers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->rd_wait)) + wake_up_interruptible(&pipe->rd_wait); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -462,8 +462,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); - if (waitqueue_active(&pipe->wait)) - wake_up_interruptible(&pipe->wait); + if (waitqueue_active(&pipe->wr_wait)) + wake_up_interruptible(&pipe->wr_wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 0b98e3c8b461..6c0e19f7a21f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -228,6 +228,8 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, if (nm.hash) { ubifs_assert(c, fname_len(&nm) == 0); ubifs_assert(c, fname_name(&nm) == NULL); + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index cd52585c8f4f..a771273fba7e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -786,7 +786,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, if (page_offset > end_index) break; - page = find_or_create_page(mapping, page_offset, ra_gfp_mask); + page = pagecache_get_page(mapping, page_offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, + ra_gfp_mask); if (!page) break; if (!PageUptodate(page)) @@ -1078,18 +1080,12 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr) inode->i_uid = attr->ia_uid; if (attr->ia_valid & ATTR_GID) inode->i_gid = attr->ia_gid; - if (attr->ia_valid & ATTR_ATIME) { - inode->i_atime = timestamp_truncate(attr->ia_atime, - inode); - } - if (attr->ia_valid & ATTR_MTIME) { - inode->i_mtime = timestamp_truncate(attr->ia_mtime, - inode); - } - if (attr->ia_valid & ATTR_CTIME) { - inode->i_ctime = timestamp_truncate(attr->ia_ctime, - inode); - } + if (attr->ia_valid & ATTR_ATIME) + inode->i_atime = attr->ia_atime; + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; if (attr->ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 5dc5abca11c7..eeb1be259888 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -113,7 +113,8 @@ static int setflags(struct inode *inode, int flags) if (err) goto out_unlock; - ui->flags = ioctl2ubifs(flags); + ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS); + ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); inode->i_ctime = current_time(inode); release = ui->dirty; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 54d6db61106f..edf43ddd7dce 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -129,7 +129,7 @@ static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o) static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) { if (orph->del) { - dbg_gen("deleted twice ino %lu", orph->inum); + dbg_gen("deleted twice ino %lu", (unsigned long)orph->inum); return; } @@ -137,7 +137,7 @@ static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph) orph->del = 1; orph->dnext = c->orph_dnext; c->orph_dnext = orph; - dbg_gen("delete later ino %lu", orph->inum); + dbg_gen("delete later ino %lu", (unsigned long)orph->inum); return; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 2b7c04bf8983..17c90dff7266 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -161,7 +161,7 @@ static int create_default_filesystem(struct ubifs_info *c) sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL); mst = kzalloc(c->mst_node_alsz, GFP_KERNEL); idx_node_size = ubifs_idx_node_sz(c, 1); - idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL); + idx = kzalloc(ALIGN(idx_node_size, c->min_io_size), GFP_KERNEL); ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL); cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5e1e8ec0589e..7fc2f3f07c16 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1599,6 +1599,7 @@ static int mount_ubifs(struct ubifs_info *c) vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); + kfree(c->sup_node); ubifs_debugging_exit(c); return err; } @@ -1641,6 +1642,7 @@ static void ubifs_umount(struct ubifs_info *c) vfree(c->ileb_buf); vfree(c->sbuf); kfree(c->bottom_up_buf); + kfree(c->sup_node); ubifs_debugging_exit(c); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 8c28e93e9b73..4baa1ca91e9b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1035,7 +1035,6 @@ static int check_partition_desc(struct super_block *sb, switch (le32_to_cpu(p->accessType)) { case PD_ACCESS_TYPE_READ_ONLY: case PD_ACCESS_TYPE_WRITE_ONCE: - case PD_ACCESS_TYPE_REWRITABLE: case PD_ACCESS_TYPE_NONE: goto force_ro; } @@ -2492,17 +2491,29 @@ static unsigned int udf_count_free_table(struct super_block *sb, static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; + unsigned int part = sbi->s_partition; + int ptype = sbi->s_partmaps[part].s_partition_type; + + if (ptype == UDF_METADATA_MAP25) { + part = sbi->s_partmaps[part].s_type_specific.s_metadata. + s_phys_partition_ref; + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { + /* + * Filesystems with VAT are append-only and we cannot write to + * them. Let's just report 0 here. + */ + return 0; + } - sbi = UDF_SB(sb); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( - lvid->freeSpaceTable[sbi->s_partition]); + lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -2511,7 +2522,7 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - map = &sbi->s_partmaps[sbi->s_partition]; + map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); diff --git a/fs/utimes.c b/fs/utimes.c index c952b6b3d8a0..1d17ce98cb80 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -36,14 +36,14 @@ static int utimes_common(const struct path *path, struct timespec64 *times) if (times[0].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_ATIME; else if (times[0].tv_nsec != UTIME_NOW) { - newattrs.ia_atime = timestamp_truncate(times[0], inode); + newattrs.ia_atime = times[0]; newattrs.ia_valid |= ATTR_ATIME_SET; } if (times[1].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_MTIME; else if (times[1].tv_nsec != UTIME_NOW) { - newattrs.ia_mtime = timestamp_truncate(times[1], inode); + newattrs.ia_mtime = times[1]; newattrs.ia_valid |= ATTR_MTIME_SET; } /* diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 94badfa1743e..91c2cb14276e 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -26,7 +26,7 @@ struct xfs_attr_list_context; *========================================================================*/ -#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ +#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */ #define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ #define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */ #define ATTR_SECURE 0x0008 /* use attrs in security namespace */ @@ -37,7 +37,10 @@ struct xfs_attr_list_context; #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ #define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */ -#define ATTR_ALLOC 0x8000 /* allocate xattr buffer on demand */ +#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */ + +#define ATTR_KERNEL_FLAGS \ + (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC) #define XFS_ATTR_FLAGS \ { ATTR_DONTFOLLOW, "DONTFOLLOW" }, \ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 7b35d62ede9f..edfbdb8f85e2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -462,6 +462,8 @@ xfs_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; + ops[i].am_error = strncpy_from_user((char *)attr_name, ops[i].am_attrname, MAXNAMELEN); if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c4c4f09113d3..bd9d9ebf85d8 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -450,6 +450,8 @@ xfs_compat_attrmulti_by_handle( error = 0; for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_flags &= ~ATTR_KERNEL_FLAGS; + ops[i].am_error = strncpy_from_user((char *)attr_name, compat_ptr(ops[i].am_attrname), MAXNAMELEN); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 18790b9e16b5..546e6adfeced 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -752,6 +752,8 @@ ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u3 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_gpe_status_set(void)) +ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_any_fixed_event_status_set(void)) ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_get_gpe_device(u32 gpe_index, diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 2f3f28c7cea3..9373662cdb44 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -532,11 +532,12 @@ typedef u64 acpi_integer; strnlen (a, ACPI_NAMESEG_SIZE) == ACPI_NAMESEG_SIZE) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h index 8874f681b056..59eb72bf7d5f 100644 --- a/include/asm-generic/resource.h +++ b/include/asm-generic/resource.h @@ -23,7 +23,7 @@ [RLIMIT_LOCKS] = { RLIM_INFINITY, RLIM_INFINITY }, \ [RLIMIT_SIGPENDING] = { 0, 0 }, \ [RLIMIT_MSGQUEUE] = { MQ_BYTES_MAX, MQ_BYTES_MAX }, \ - [RLIMIT_NICE] = { 0, 0 }, \ + [RLIMIT_NICE] = { 30, 30 }, \ [RLIMIT_RTPRIO] = { 0, 0 }, \ [RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \ } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 2b10036fefd0..9e22ac369d1d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -137,13 +137,6 @@ * When used, an architecture is expected to provide __tlb_remove_table() * which does the actual freeing of these pages. * - * HAVE_RCU_TABLE_NO_INVALIDATE - * - * This makes HAVE_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before - * freeing the page-table pages. This can be avoided if you use - * HAVE_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux - * page-tables natively. - * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). @@ -189,8 +182,23 @@ struct mmu_table_batch { extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +/* + * This allows an architecture that does not use the linux page-tables for + * hardware to skip the TLBI when freeing page tables. + */ +#ifndef tlb_needs_table_invalidate +#define tlb_needs_table_invalidate() (true) +#endif + +#else + +#ifdef tlb_needs_table_invalidate +#error tlb_needs_table_invalidate() requires HAVE_RCU_TABLE_FREE #endif +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + + #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h index ce4103208619..cec543d9e87b 100644 --- a/include/asm-generic/vdso/vsyscall.h +++ b/include/asm-generic/vdso/vsyscall.h @@ -12,9 +12,9 @@ static __always_inline struct vdso_data *__arch_get_k_vdso_data(void) #endif /* __arch_get_k_vdso_data */ #ifndef __arch_update_vdso_data -static __always_inline int __arch_update_vdso_data(void) +static __always_inline bool __arch_update_vdso_data(void) { - return 0; + return true; } #endif /* __arch_update_vdso_data */ diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index e34a7b7f848a..294b2931c4cc 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -96,6 +96,11 @@ struct drm_gem_shmem_object { * The address are un-mapped when the count reaches zero. */ unsigned int vmap_use_count; + + /** + * @map_cached: map object cached (instead of using writecombine). + */ + bool map_cached; }; #define to_drm_gem_shmem_obj(obj) \ diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h index 0f2b8423ce1d..65ac6eb6c733 100644 --- a/include/dt-bindings/clock/imx8mn-clock.h +++ b/include/dt-bindings/clock/imx8mn-clock.h @@ -122,8 +122,8 @@ #define IMX8MN_CLK_I2C1 105 #define IMX8MN_CLK_I2C2 106 #define IMX8MN_CLK_I2C3 107 -#define IMX8MN_CLK_I2C4 118 -#define IMX8MN_CLK_UART1 119 +#define IMX8MN_CLK_I2C4 108 +#define IMX8MN_CLK_UART1 109 #define IMX8MN_CLK_UART2 110 #define IMX8MN_CLK_UART3 111 #define IMX8MN_CLK_UART4 112 diff --git a/include/dt-bindings/clock/qcom,gcc-msm8998.h b/include/dt-bindings/clock/qcom,gcc-msm8998.h index de1d8a1f5966..63e02dc32a0b 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8998.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8998.h @@ -182,6 +182,7 @@ #define GCC_MSS_GPLL0_DIV_CLK_SRC 173 #define GCC_MSS_SNOC_AXI_CLK 174 #define GCC_MSS_MNOC_BIMC_AXI_CLK 175 +#define GCC_BIMC_GFX_CLK 176 #define PCIE_0_GDSC 0 #define UFS_GDSC 1 diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 97967ce06de3..f88197c1ffc2 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -504,4 +505,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) (1 << WB_async_congested)); } +extern const char *bdi_unknown_name; + +static inline const char *bdi_dev_name(struct backing_dev_info *bdi) +{ + if (!bdi || !bdi->dev) + return bdi_unknown_name; + return dev_name(bdi->dev); +} + #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4c636c42ad68..1cb5afed5515 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -524,7 +524,7 @@ struct request_queue { unsigned int sg_reserved_size; int node; #ifdef CONFIG_BLK_DEV_IO_TRACE - struct blk_trace *blk_trace; + struct blk_trace __rcu *blk_trace; struct mutex blk_trace_mutex; #endif /* diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7bb2d8de9f30..3b6ff5902edc 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -51,9 +51,13 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f **/ #define blk_add_cgroup_trace_msg(q, cg, fmt, ...) \ do { \ - struct blk_trace *bt = (q)->blk_trace; \ + struct blk_trace *bt; \ + \ + rcu_read_lock(); \ + bt = rcu_dereference((q)->blk_trace); \ if (unlikely(bt)) \ __trace_note_message(bt, cg, fmt, ##__VA_ARGS__);\ + rcu_read_unlock(); \ } while (0) #define blk_add_trace_msg(q, fmt, ...) \ blk_add_cgroup_trace_msg(q, NULL, fmt, ##__VA_ARGS__) @@ -61,10 +65,14 @@ void __trace_note_message(struct blk_trace *, struct blkcg *blkcg, const char *f static inline bool blk_trace_note_message_enabled(struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; - if (likely(!bt)) - return false; - return bt->act_mask & BLK_TC_NOTIFY; + struct blk_trace *bt; + bool ret; + + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + ret = bt && (bt->act_mask & BLK_TC_NOTIFY); + rcu_read_unlock(); + return ret; } extern void blk_add_driver_data(struct request_queue *q, struct request *rq, diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d7ddebd0cdec..e75d2191226b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,6 +62,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/include/linux/compat.h b/include/linux/compat.h index 68f79d855c3d..11083d84eb23 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -958,4 +958,22 @@ static inline bool in_compat_syscall(void) { return false; } #endif /* CONFIG_COMPAT */ +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +#ifndef compat_ptr +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)uptr; +} +#endif + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 31b1b0e03df8..018dce868de6 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -148,6 +148,20 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +/* + * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() + * callback for sanitization. That callback is only expected to modify the min + * and max values, if necessary, and specifically it must not update the + * frequency table. + */ +struct cpufreq_policy_data { + struct cpufreq_cpuinfo cpuinfo; + struct cpufreq_frequency_table *freq_table; + unsigned int cpu; + unsigned int min; /* in kHz */ + unsigned int max; /* in kHz */ +}; + struct cpufreq_freqs { struct cpufreq_policy *policy; unsigned int old; @@ -201,8 +215,6 @@ u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); void cpufreq_cpu_release(struct cpufreq_policy *policy); int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); -int cpufreq_set_policy(struct cpufreq_policy *policy, - struct cpufreq_policy *new_policy); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); @@ -284,7 +296,7 @@ struct cpufreq_driver { /* needed by all drivers */ int (*init)(struct cpufreq_policy *policy); - int (*verify)(struct cpufreq_policy *policy); + int (*verify)(struct cpufreq_policy_data *policy); /* define one out of two */ int (*setpolicy)(struct cpufreq_policy *policy); @@ -415,8 +427,9 @@ static inline int cpufreq_thermal_control_enabled(struct cpufreq_driver *drv) (drv->flags & CPUFREQ_IS_COOLING_DEV); } -static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, - unsigned int min, unsigned int max) +static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *policy, + unsigned int min, + unsigned int max) { if (policy->min < min) policy->min = min; @@ -432,10 +445,10 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, } static inline void -cpufreq_verify_within_cpu_limits(struct cpufreq_policy *policy) +cpufreq_verify_within_cpu_limits(struct cpufreq_policy_data *policy) { cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); + policy->cpuinfo.max_freq); } #ifdef CONFIG_CPU_FREQ @@ -513,6 +526,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * CPUFREQ GOVERNORS * *********************************************************************/ +#define CPUFREQ_POLICY_UNKNOWN (0) /* * If (cpufreq_driver->target) exists, the ->governor decides what frequency * within the limits is used. If (cpufreq_driver->setpolicy> exists, these @@ -684,9 +698,9 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev, int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, +int cpufreq_frequency_table_verify(struct cpufreq_policy_data *policy, struct cpufreq_frequency_table *table); -int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); +int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy); int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, unsigned int target_freq, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e51ee772b9f5..def48a583670 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -59,6 +59,7 @@ enum cpuhp_state { CPUHP_IOMMU_INTEL_DEAD, CPUHP_LUSTRE_CFS_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + CPUHP_PADATA_DEAD, CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index fb376b5b7281..95816a8e3d26 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -177,8 +177,8 @@ struct devfreq { /* information for device frequency transition */ unsigned int total_trans; unsigned int *trans_table; - unsigned long *time_in_state; - unsigned long last_stat_updated; + u64 *time_in_state; + u64 last_stat_updated; struct srcu_notifier_head transition_notifier_list; diff --git a/include/linux/device.h b/include/linux/device.h index 96ff76731e93..50d97767d8d6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1522,6 +1522,17 @@ static inline struct device_node *dev_of_node(struct device *dev) void driver_init(void); +static inline bool dev_has_sync_state(struct device *dev) +{ + if (!dev) + return false; + if (dev->driver && dev->driver->sync_state) + return true; + if (dev->bus && dev->bus->sync_state) + return true; + return false; +} + /* * High level routines for use by the bus drivers */ diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index dad4a68fa009..8013562751a5 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -674,6 +674,7 @@ struct dma_filter { * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api + * @owner: owner module (automatically set based on the provided dev) * @src_addr_widths: bit mask of src addr widths the device supports * Width is specified in bytes, e.g. for a device supporting * a width of 4 the mask should have BIT(4) set. @@ -737,6 +738,7 @@ struct dma_device { int dev_id; struct device *dev; + struct module *owner; u32 src_addr_widths; u32 dst_addr_widths; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f64ca27dc210..712be8bc6a7c 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -69,8 +69,9 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) +#define for_each_drhd_unit(drhd) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) #define for_each_active_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ @@ -81,7 +82,8 @@ extern struct list_head dmar_drhd_units; if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (i=drhd->iommu, 0) {} else static inline bool dmar_rcu_check(void) diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index ffcc7724ca21..dc4fd8a6644d 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining @@ -40,6 +42,13 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); +DECLARE_PER_CPU(int, eventfd_wake_count); + +static inline bool eventfd_signal_count(void) +{ + return this_cpu_read(eventfd_wake_count); +} + #else /* CONFIG_EVENTFD */ /* @@ -68,6 +77,11 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, return -ENOSYS; } +static inline bool eventfd_signal_count(void) +{ + return false; +} + #endif #endif /* _LINUX_EVENTFD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..59e8fe62afe6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -698,6 +698,7 @@ struct inode { struct rcu_head i_rcu; }; atomic64_t i_version; + atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; diff --git a/include/linux/futex.h b/include/linux/futex.h index 5cc3fed27d4c..b70df27d7e85 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -31,23 +31,26 @@ struct task_struct; union futex_key { struct { + u64 i_seq; unsigned long pgoff; - struct inode *inode; - int offset; + unsigned int offset; } shared; struct { + union { + struct mm_struct *mm; + u64 __tmp; + }; unsigned long address; - struct mm_struct *mm; - int offset; + unsigned int offset; } private; struct { + u64 ptr; unsigned long word; - void *ptr; - int offset; + unsigned int offset; } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX enum { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ea4c133b4139..0556153a9ad6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -245,18 +245,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk) !(disk->flags & GENHD_FL_NO_PART_SCAN); } -static inline bool disk_has_partitions(struct gendisk *disk) -{ - bool ret = false; - - rcu_read_lock(); - if (rcu_dereference(disk->part_tbl)->len > 1) - ret = true; - rcu_read_unlock(); - - return ret; -} - static inline dev_t disk_devt(struct gendisk *disk) { return MKDEV(disk->major, disk->first_minor); @@ -298,6 +286,7 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +bool disk_has_partitions(struct gendisk *disk); /* * Macros to operate on percpu disk statistics: diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 5215fdba6b9a..bf2d017dd7b7 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -158,6 +158,7 @@ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce); int gpiod_set_transitory(struct gpio_desc *desc, bool transitory); +void gpiod_toggle_active_low(struct gpio_desc *desc); int gpiod_is_active_low(const struct gpio_desc *desc); int gpiod_cansleep(const struct gpio_desc *desc); @@ -483,6 +484,12 @@ static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) return -ENOSYS; } +static inline void gpiod_toggle_active_low(struct gpio_desc *desc) +{ + /* GPIO can never have been requested */ + WARN_ON(desc); +} + static inline int gpiod_is_active_low(const struct gpio_desc *desc) { /* GPIO can never have been requested */ diff --git a/include/linux/hid.h b/include/linux/hid.h index cd41f209043f..875f71132b14 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -492,7 +492,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index ef1cbb5f454f..93338fd54af8 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -31,6 +31,12 @@ static inline void icmpv6_send(struct sk_buff *skb, } #endif +#if IS_ENABLED(CONFIG_NF_NAT) +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); +#else +#define icmpv6_ndo_send icmpv6_send +#endif + extern int icmpv6_init(void); extern int icmpv6_err_convert(u8 type, u8 code, int *err); diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..c91cf2dee12a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 94f047a8a845..d7c403d0dd27 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -122,7 +122,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } diff --git a/include/linux/irq.h b/include/linux/irq.h index 7853eb9301f2..3ed5a055b5f4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -209,6 +209,8 @@ struct irq_data { * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set * IRQD_CAN_RESERVE - Can use reservation mode + * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change + * required */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -231,6 +233,7 @@ enum { IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), + IRQD_MSI_NOMASK_QUIRK = (1 << 27), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -390,6 +393,21 @@ static inline bool irqd_can_reserve(struct irq_data *d) return __irqd_to_state(d) & IRQD_CAN_RESERVE; } +static inline void irqd_set_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_MSI_NOMASK_QUIRK; +} + +static inline void irqd_clr_msi_nomask_quirk(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_MSI_NOMASK_QUIRK; +} + +static inline bool irqd_msi_nomask_quirk(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_MSI_NOMASK_QUIRK; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 3c340dbc5a1f..6b7b35b5394e 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,7 +192,7 @@ enum { IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), /* Irq domain name was allocated in __irq_domain_add() */ - IRQ_DOMAIN_NAME_ALLOCATED = (1 << 6), + IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), @@ -206,6 +206,13 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but partially + * handled in core code. + */ + IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1b6d31da7cbc..dea181bdb1dd 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -171,7 +171,7 @@ static inline u64 get_jiffies_64(void) * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ -#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ)) /* * Change timeval to jiffies, trying to avoid the diff --git a/include/linux/ksm.h b/include/linux/ksm.h index e48b1e453ff5..a91a7cfc87a1 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -19,6 +19,10 @@ struct stable_node; struct mem_cgroup; #ifdef CONFIG_KSM +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags); +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags); int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); int __ksm_enter(struct mm_struct *mm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 538c25e778c0..eacb8c48e768 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -204,7 +204,7 @@ struct kvm_async_pf { struct list_head queue; struct kvm_vcpu *vcpu; struct mm_struct *mm; - gva_t gva; + gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; bool wakeup_all; @@ -212,8 +212,8 @@ struct kvm_async_pf { void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch); +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif @@ -723,6 +723,7 @@ void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_get_pfn(kvm_pfn_t pfn); +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, @@ -767,7 +768,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); @@ -775,8 +776,12 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); +int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); +int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, bool atomic); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 1c88e69db3d9..68e84cf42a3f 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -18,7 +18,7 @@ struct kvm_memslots; enum kvm_mr_change; -#include +#include /* * Address types: @@ -51,4 +51,11 @@ struct gfn_to_hva_cache { struct kvm_memory_slot *memslot; }; +struct gfn_to_pfn_cache { + u64 generation; + gfn_t gfn; + kvm_pfn_t pfn; + bool dirty; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/libata.h b/include/linux/libata.h index 2dbde119721d..bff539918d82 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1221,6 +1221,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8d..1ecd35664e0d 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h index 1013e60c5b25..b0109ee6dae2 100644 --- a/include/linux/mfd/rohm-bd70528.h +++ b/include/linux/mfd/rohm-bd70528.h @@ -317,7 +317,7 @@ enum { #define BD70528_MASK_RTC_MINUTE 0x7f #define BD70528_MASK_RTC_HOUR_24H 0x80 #define BD70528_MASK_RTC_HOUR_PM 0x20 -#define BD70528_MASK_RTC_HOUR 0x1f +#define BD70528_MASK_RTC_HOUR 0x3f #define BD70528_MASK_RTC_DAY 0x3f #define BD70528_MASK_RTC_WEEK 0x07 #define BD70528_MASK_RTC_MONTH 0x1f diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27200dea0297..a24937fc56b9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -461,6 +461,11 @@ struct mlx5_vf_context { int enabled; u64 port_guid; u64 node_guid; + /* Valid bits are used to validate administrative guid only. + * Enabled after ndo_set_vf_guid + */ + u8 port_guid_valid:1; + u8 node_guid_valid:1; enum port_state_policy policy; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5d54fccf87fc..56f124a6eccb 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1418,14 +1418,15 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_440[0x20]; - u8 tls[0x1]; - u8 reserved_at_461[0x2]; + u8 reserved_at_460[0x3]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x3]; u8 log_max_umem[0x5]; u8 max_num_eqs[0x10]; - u8 reserved_at_480[0x3]; + u8 reserved_at_480[0x1]; + u8 tls_tx[0x1]; + u8 reserved_at_482[0x1]; u8 log_max_l2_table[0x5]; u8 reserved_at_488[0x8]; u8 log_uar_page_sz[0x10]; diff --git a/include/linux/mm.h b/include/linux/mm.h index cfaa8feecfe8..70f4278bb193 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2687,6 +2687,10 @@ static inline bool debug_pagealloc_enabled_static(void) #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP) extern void __kernel_map_pages(struct page *page, int numpages, int enable); +/* + * When called in DEBUG_PAGEALLOC context, the call should most likely be + * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static() + */ static inline void kernel_map_pages(struct page *page, int numpages, int enable) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cac56fb59af8..1dabd86b232a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -72,6 +72,8 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ +#define MAX_NEST_DEV 8 + /* * Transmit return codes: transmit return codes originate from three different * namespaces: @@ -4323,11 +4325,8 @@ void *netdev_lower_get_next(struct net_device *dev, ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) -struct net_device *netdev_all_lower_get_next(struct net_device *dev, +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter); - int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, void *data), diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 908d38dbcb91..5448c8b443db 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -121,6 +121,7 @@ struct ip_set_ext { u32 timeout; u8 packets_op; u8 bytes_op; + bool target; }; struct ip_set; @@ -187,6 +188,14 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Region-locking is used */ + bool region_lock; +}; + +struct ip_set_region { + spinlock_t lock; /* Region lock */ + size_t ext_size; /* Size of the dynamic extensions */ + u32 elements; /* Number of elements vs timeout */ }; /* The core set type structure */ @@ -501,7 +510,7 @@ ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo, } #define IP_SET_INIT_KEXT(skb, opt, set) \ - { .bytes = (skb)->len, .packets = 1, \ + { .bytes = (skb)->len, .packets = 1, .target = true,\ .timeout = ip_set_adt_opt_timeout(opt, set) } #define IP_SET_INIT_UEXT(set) \ diff --git a/include/linux/padata.h b/include/linux/padata.h index 23717eeaad23..cccab7a59787 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -9,6 +9,7 @@ #ifndef PADATA_H #define PADATA_H +#include #include #include #include @@ -98,7 +99,7 @@ struct padata_cpumask { * struct parallel_data - Internal control structure, covers everything * that depends on the cpumask in use. * - * @pinst: padata instance. + * @sh: padata_shell object. * @pqueue: percpu padata queues used for parallelization. * @squeue: percpu padata queues used for serialuzation. * @reorder_objects: Number of objects waiting in the reorder queues. @@ -111,7 +112,7 @@ struct padata_cpumask { * @lock: Reorder lock. */ struct parallel_data { - struct padata_instance *pinst; + struct padata_shell *ps; struct padata_parallel_queue __percpu *pqueue; struct padata_serial_queue __percpu *squeue; atomic_t reorder_objects; @@ -124,14 +125,33 @@ struct parallel_data { spinlock_t lock ____cacheline_aligned; }; +/** + * struct padata_shell - Wrapper around struct parallel_data, its + * purpose is to allow the underlying control structure to be replaced + * on the fly using RCU. + * + * @pinst: padat instance. + * @pd: Actual parallel_data structure which may be substituted on the fly. + * @opd: Pointer to old pd to be freed by padata_replace. + * @list: List entry in padata_instance list. + */ +struct padata_shell { + struct padata_instance *pinst; + struct parallel_data __rcu *pd; + struct parallel_data *opd; + struct list_head list; +}; + /** * struct padata_instance - The overall control structure. * * @cpu_notifier: cpu hotplug notifier. * @parallel_wq: The workqueue used for parallel work. * @serial_wq: The workqueue used for serial work. - * @pd: The internal control structure. + * @pslist: List of padata_shell objects attached to this instance. * @cpumask: User supplied cpumasks for parallel and serial works. + * @rcpumask: Actual cpumasks based on user cpumask and cpu_online_mask. + * @omask: Temporary storage used to compute the notification mask. * @cpumask_change_notifier: Notifiers chain for user-defined notify * callbacks that will be called when either @pcpu or @cbcpu * or both cpumasks change. @@ -143,8 +163,10 @@ struct padata_instance { struct hlist_node node; struct workqueue_struct *parallel_wq; struct workqueue_struct *serial_wq; - struct parallel_data *pd; + struct list_head pslist; struct padata_cpumask cpumask; + struct padata_cpumask rcpumask; + cpumask_var_t omask; struct blocking_notifier_head cpumask_change_notifier; struct kobject kobj; struct mutex lock; @@ -156,7 +178,9 @@ struct padata_instance { extern struct padata_instance *padata_alloc_possible(const char *name); extern void padata_free(struct padata_instance *pinst); -extern int padata_do_parallel(struct padata_instance *pinst, +extern struct padata_shell *padata_alloc_shell(struct padata_instance *pinst); +extern void padata_free_shell(struct padata_shell *ps); +extern int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu); extern void padata_do_serial(struct padata_priv *padata); extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1bf83c8fcaa7..77de28bfefb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) +PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) __SETPAGEFLAG(Referenced, referenced, PF_HEAD) diff --git a/include/linux/pci.h b/include/linux/pci.h index c393dff2d66f..930fab293073 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2310,7 +2310,7 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) } #endif -void pci_add_dma_alias(struct pci_dev *dev, u8 devfn); +void pci_add_dma_alias(struct pci_dev *dev, u8 devfn_from, unsigned nr_devfns); bool pci_devs_are_dma_aliases(struct pci_dev *dev1, struct pci_dev *dev2); int pci_for_each_dma_alias(struct pci_dev *pdev, int (*fn)(struct pci_dev *pdev, diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index a6fabd865211..176bfbd52d97 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -175,8 +175,7 @@ * Declaration/definition used for per-CPU variables that should be accessed * as decrypted when memory encryption is enabled in the guest. */ -#if defined(CONFIG_VIRTUALIZATION) && defined(CONFIG_AMD_MEM_ENCRYPT) - +#ifdef CONFIG_AMD_MEM_ENCRYPT #define DECLARE_PER_CPU_DECRYPTED(type, name) \ DECLARE_PER_CPU_SECTION(type, name, "..decrypted") diff --git a/include/linux/phy.h b/include/linux/phy.h index dd4a91f1feaa..b5e21c3a22d0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -338,6 +338,7 @@ struct phy_c45_device_ids { * is_gigabit_capable: Set to true if PHY supports 1000Mbps * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -376,6 +377,7 @@ struct phy_device { unsigned is_gigabit_capable:1; unsigned has_fixups:1; unsigned suspended:1; + unsigned suspended_by_mdio_bus:1; unsigned sysfs_links:1; unsigned loopback_enabled:1; @@ -531,6 +533,7 @@ struct phy_driver { /* * Checks if the PHY generated an interrupt. * For multi-PHY devices with shared PHY interrupt pin + * Set interrupt bits have to be cleared. */ int (*did_interrupt)(struct phy_device *phydev); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index dbcfa6892384..d5765039652a 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -47,7 +47,7 @@ struct pipe_buffer { **/ struct pipe_inode_info { struct mutex mutex; - wait_queue_head_t wait; + wait_queue_head_t rd_wait, wr_wait; unsigned int head; unsigned int tail; unsigned int max_usage; diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 8cfe570fdece..2cbde6542849 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -49,6 +49,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_CLKDM_NOAUTO BIT(21) #define SYSC_QUIRK_FORCE_MSTANDBY BIT(20) #define SYSC_MODULE_QUIRK_AESS BIT(19) #define SYSC_MODULE_QUIRK_SGX BIT(18) diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 276a03c24691..041bfa412aa0 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -24,7 +24,7 @@ struct platform_device { int id; bool id_auto; struct device dev; - u64 dma_mask; + u64 platform_dma_mask; u32 num_resources; struct resource *resource; diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 0832c9b66852..e0ddb47f4402 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -27,7 +27,6 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; #include #include -#include #include #include #include @@ -59,7 +58,9 @@ extern const char raid6_empty_zero_page[PAGE_SIZE]; #define enable_kernel_altivec() #define disable_kernel_altivec() +#undef EXPORT_SYMBOL #define EXPORT_SYMBOL(sym) +#undef EXPORT_SYMBOL_GPL #define EXPORT_SYMBOL_GPL(sym) #define MODULE_LICENSE(licence) #define MODULE_DESCRIPTION(desc) diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 61974c4c566b..90f2e2232c6d 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,10 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } /** diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 337a46391527..6a92fd3105a3 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -287,6 +287,8 @@ void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, const char *const *supply_names, unsigned int num_supplies); +bool regulator_is_equal(struct regulator *reg1, struct regulator *reg2); + #else /* @@ -593,6 +595,11 @@ regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers, { } +static inline bool +regulator_is_equal(struct regulator *reg1, struct regulator *reg2) +{ + return false; +} #endif static inline int regulator_set_voltage_triplet(struct regulator *regulator, diff --git a/include/linux/sched.h b/include/linux/sched.h index 716ad1d8d95e..9d08ce1d6e6c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -649,13 +649,18 @@ struct task_struct { unsigned int flags; unsigned int ptrace; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BMQ) struct llist_node wake_entry; +#endif +#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_BMQ) int on_cpu; +#endif +#ifdef CONFIG_SMP #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsigned int cpu; #endif +#ifndef CONFIG_SCHED_BMQ unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; @@ -669,6 +674,7 @@ struct task_struct { */ int recent_used_cpu; int wake_cpu; +#endif /* !CONFIG_SCHED_BMQ */ #endif int on_rq; @@ -677,13 +683,23 @@ struct task_struct { int normal_prio; unsigned int rt_priority; +#ifdef CONFIG_SCHED_BMQ + u64 last_ran; + s64 time_slice; + int boost_prio; + int bmq_idx; + struct list_head bmq_node; + /* sched_clock time spent running */ + u64 sched_time; +#else /* !CONFIG_SCHED_BMQ */ const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; + struct sched_dl_entity dl; +#endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK /* Clamp values requested for a scheduling entity */ @@ -1298,6 +1314,15 @@ struct task_struct { */ }; +#ifdef CONFIG_SCHED_BMQ +#define tsk_seruntime(t) ((t)->sched_time) +/* replace the uncertian rt_timeout with 0UL */ +#define tsk_rttimeout(t) (0UL) +#else /* CFS */ +#define tsk_seruntime(t) ((t)->se.sum_exec_runtime) +#define tsk_rttimeout(t) ((t)->rt.timeout) +#endif /* !CONFIG_SCHED_BMQ */ + static inline struct pid *task_pid(struct task_struct *task) { return task->thread_pid; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 1aff00b65f3c..02a3c5d34ee4 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -1,5 +1,22 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_SCHED_BMQ + +#define __tsk_deadline(p) (0UL) + +static inline int dl_prio(int prio) +{ + return 0; +} + +static inline int dl_task(struct task_struct *p) +{ + return (SCHED_NORMAL == p->policy); +} +#else + +#define __tsk_deadline(p) ((p)->dl.deadline) + /* * SCHED_DEADLINE tasks has negative priorities, reflecting * the fact that any of them has higher prio than RT and @@ -19,6 +36,7 @@ static inline int dl_task(struct task_struct *p) { return dl_prio(p->prio); } +#endif /* CONFIG_SCHED_BMQ */ static inline bool dl_time_before(u64 a, u64 b) { diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h index 1abe91ff6e4a..6d67e9a5af6b 100644 --- a/include/linux/sched/nohz.h +++ b/include/linux/sched/nohz.h @@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { } #ifdef CONFIG_NO_HZ_COMMON void calc_load_nohz_start(void); +void calc_load_nohz_remote(struct rq *rq); void calc_load_nohz_stop(void); #else static inline void calc_load_nohz_start(void) { } +static inline void calc_load_nohz_remote(struct rq *rq) { } static inline void calc_load_nohz_stop(void) { } #endif /* CONFIG_NO_HZ_COMMON */ diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 7d64feafc408..d9dc5d3ccd2e 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -20,11 +20,17 @@ */ #define MAX_USER_RT_PRIO 100 + #define MAX_RT_PRIO MAX_USER_RT_PRIO #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) +#ifdef CONFIG_SCHED_BMQ +/* +/- priority levels from the base priority */ +#define MAX_PRIORITY_ADJ 4 +#endif + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index e5af028c08b4..6387c8ea9832 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) if (policy == SCHED_FIFO || policy == SCHED_RR) return true; +#ifndef CONFIG_SCHED_BMQ if (policy == SCHED_DEADLINE) return true; +#endif return false; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9133bcf0544..64e5b1be9ff5 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1478,6 +1478,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb) skb->next = NULL; } +/* Iterate through singly-linked GSO fragments of an skb. */ +#define skb_list_walk_safe(first, skb, next) \ + for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next), (next) = (skb) ? (skb)->next : NULL) + static inline void skb_list_del_init(struct sk_buff *skb) { __list_del_entry(&skb->list); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 6fc8843f1c9e..cd97d2c8840c 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -191,7 +191,7 @@ struct platform_s2idle_ops { int (*begin)(void); int (*prepare)(void); int (*prepare_late)(void); - void (*wake)(void); + bool (*wake)(void); void (*restore_early)(void); void (*restore)(void); void (*end)(void); diff --git a/include/linux/tty.h b/include/linux/tty.h index bfa4e2ee94a9..bd5fe0e907e8 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -225,6 +225,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ diff --git a/include/linux/usb/irda.h b/include/linux/usb/irda.h index 396d2b043e64..556a801efce3 100644 --- a/include/linux/usb/irda.h +++ b/include/linux/usb/irda.h @@ -119,11 +119,22 @@ struct usb_irda_cs_descriptor { * 6 - 115200 bps * 7 - 576000 bps * 8 - 1.152 Mbps - * 9 - 5 mbps + * 9 - 4 Mbps * 10..15 - Reserved */ #define USB_IRDA_STATUS_LINK_SPEED 0x0f +#define USB_IRDA_LS_NO_CHANGE 0 +#define USB_IRDA_LS_2400 1 +#define USB_IRDA_LS_9600 2 +#define USB_IRDA_LS_19200 3 +#define USB_IRDA_LS_38400 4 +#define USB_IRDA_LS_57600 5 +#define USB_IRDA_LS_115200 6 +#define USB_IRDA_LS_576000 7 +#define USB_IRDA_LS_1152000 8 +#define USB_IRDA_LS_4000000 9 + /* The following is a 4-bit value used only for * outbound header: * diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index a1be64c9940f..22c1f579afe3 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -69,4 +69,7 @@ /* Hub needs extra delay after resetting its port. */ #define USB_QUIRK_HUB_SLOW_RESET BIT(14) +/* device has blacklisted endpoints */ +#define USB_QUIRK_ENDPOINT_BLACKLIST BIT(15) + #endif /* __LINUX_USB_QUIRKS_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a4b241102771..0e7cd3aa489b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -139,8 +139,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 5f36e0d2ede6..95353ae476a1 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -371,7 +371,7 @@ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) @@ -403,7 +403,7 @@ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) @@ -431,8 +431,8 @@ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ } while (0) /** @@ -462,8 +462,8 @@ static inline bool v4l2_device_supports_requests(struct v4l2_device *v4l2_dev) ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ }) diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h index c86474dc7b55..8800a640c224 100644 --- a/include/media/v4l2-rect.h +++ b/include/media/v4l2-rect.h @@ -63,10 +63,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r, r->left = boundary->left; if (r->top < boundary->top) r->top = boundary->top; - if (r->left + r->width > boundary->width) - r->left = boundary->width - r->width; - if (r->top + r->height > boundary->height) - r->top = boundary->height - r->height; + if (r->left + r->width > boundary->left + boundary->width) + r->left = boundary->left + boundary->width - r->width; + if (r->top + r->height > boundary->top + boundary->height) + r->top = boundary->top + boundary->height - r->height; } /** diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 54e227e6b06a..a259050f84af 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -108,6 +108,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d93017a7ce5c..e03827f702f3 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include struct sk_buff; @@ -349,4 +350,12 @@ struct bpf_flow_dissector { void *data_end; }; +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index 5d4bfdba9adf..9ac2d2672a93 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } +#if IS_ENABLED(CONFIG_NF_NAT) +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); +#else +#define icmp_ndo_send icmp_send +#endif + int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/include/net/ipx.h b/include/net/ipx.h index baf090390998..9d1342807b59 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -47,11 +47,6 @@ struct ipxhdr { /* From af_ipx.c */ extern int sysctl_ipx_pprop_broadcasting; -static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb) -{ - return (struct ipxhdr *)skb_transport_header(skb); -} - struct ipx_interface { /* IPX address */ __be32 if_netnum; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index aa145808e57a..77e6b5a83b06 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1004,12 +1004,11 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate) struct ieee80211_tx_info { /* common information */ u32 flags; - u8 band; - - u8 hw_queue; - - u16 ack_frame_id:6; - u16 tx_time_est:10; + u32 band:3, + ack_frame_id:13, + hw_queue:4, + tx_time_est:10; + /* 2 free bits */ union { struct { diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e553fc80eb23..9976ad2f54fd 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -141,31 +141,38 @@ __cls_set_class(unsigned long *clp, unsigned long cl) return xchg(clp, cl); } -static inline unsigned long -cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) +static inline void +__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { - unsigned long old_cl; + unsigned long cl; - sch_tree_lock(q); - old_cl = __cls_set_class(clp, cl); - sch_tree_unlock(q); - return old_cl; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = __cls_set_class(&r->class, cl); + if (cl) + q->ops->cl_ops->unbind_tcf(q, cl); } static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl; /* Check q as it is not set for shared blocks. In that case, * setting class is not supported. */ if (!q) return; - cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); - cl = cls_set_class(q, &r->class, cl); - if (cl) + sch_tree_lock(q); + __tcf_bind_filter(q, r, base); + sch_tree_unlock(q); +} + +static inline void +__tcf_unbind_filter(struct Qdisc *q, struct tcf_result *r) +{ + unsigned long cl; + + if ((cl = __cls_set_class(&r->class, 0)) != 0) q->ops->cl_ops->unbind_tcf(q, cl); } @@ -173,12 +180,10 @@ static inline void tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl; if (!q) return; - if ((cl = __cls_set_class(&r->class, 0)) != 0) - q->ops->cl_ops->unbind_tcf(q, cl); + __tcf_unbind_filter(q, r); } struct tcf_exts { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index fceddf89592a..151208704ed2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -318,7 +318,8 @@ struct tcf_proto_ops { void *type_data); void (*hw_del)(struct tcf_proto *tp, void *type_data); - void (*bind_class)(void *, u32, unsigned long); + void (*bind_class)(void *, u32, unsigned long, + void *, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, struct nlattr **tca, diff --git a/include/net/udp.h b/include/net/udp.h index bad74f780831..8f163d674f07 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -476,6 +476,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + if (skb->pkt_type == PACKET_LOOPBACK) + skb->ip_summed = CHECKSUM_PARTIAL; + /* the GSO CB lays after the UDP one, no need to save and restore any * CB fragment */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5608e14e3aad..a14f837fb1c8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2149,7 +2149,6 @@ struct ib_port_cache { struct ib_cache { rwlock_t lock; - struct ib_event_handler event_handler; }; struct ib_port_immutable { @@ -2627,7 +2626,11 @@ struct ib_device { struct rcu_head rcu_head; struct list_head event_handler_list; - spinlock_t event_handler_lock; + /* Protects event_handler_list */ + struct rw_semaphore event_handler_rwsem; + + /* Protects QP's event_handler calls and open_qp list */ + spinlock_t event_handler_lock; struct rw_semaphore client_data_rwsem; struct xarray client_data; @@ -2942,7 +2945,7 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, void ib_register_event_handler(struct ib_event_handler *event_handler); void ib_unregister_event_handler(struct ib_event_handler *event_handler); -void ib_dispatch_event(struct ib_event *event); +void ib_dispatch_event(const struct ib_event *event); int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); @@ -4300,6 +4303,9 @@ static inline int ib_check_mr_access(int flags) !(flags & IB_ACCESS_LOCAL_WRITE)) return -EINVAL; + if (flags & ~IB_ACCESS_SUPPORTED) + return -EINVAL; + return 0; } diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index 533f56733ba8..b71b5c4f418c 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -627,7 +627,6 @@ struct iscsi_reject { #define ISCSI_REASON_BOOKMARK_INVALID 9 #define ISCSI_REASON_BOOKMARK_NO_RESOURCES 10 #define ISCSI_REASON_NEGOTIATION_RESET 11 -#define ISCSI_REASON_WAITING_FOR_LOGOUT 12 /* Max. number of Key=Value pairs in a text message */ #define MAX_KEY_VALUE_PAIRS 8192 diff --git a/include/scsi/sg.h b/include/scsi/sg.h index f91bcca604e4..29c7ad04d2e2 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -68,6 +68,36 @@ typedef struct sg_io_hdr unsigned int info; /* [o] auxiliary information */ } sg_io_hdr_t; /* 64 bytes long (on i386) */ +#if defined(__KERNEL__) +#include + +struct compat_sg_io_hdr { + compat_int_t interface_id; /* [i] 'S' for SCSI generic (required) */ + compat_int_t dxfer_direction; /* [i] data transfer direction */ + unsigned char cmd_len; /* [i] SCSI command length ( <= 16 bytes) */ + unsigned char mx_sb_len; /* [i] max length to write to sbp */ + unsigned short iovec_count; /* [i] 0 implies no scatter gather */ + compat_uint_t dxfer_len; /* [i] byte count of data transfer */ + compat_uint_t dxferp; /* [i], [*io] points to data transfer memory + or scatter gather list */ + compat_uptr_t cmdp; /* [i], [*i] points to command to perform */ + compat_uptr_t sbp; /* [i], [*o] points to sense_buffer memory */ + compat_uint_t timeout; /* [i] MAX_UINT->no timeout (unit: millisec) */ + compat_uint_t flags; /* [i] 0 -> default, see SG_FLAG... */ + compat_int_t pack_id; /* [i->o] unused internally (normally) */ + compat_uptr_t usr_ptr; /* [i->o] unused internally */ + unsigned char status; /* [o] scsi status */ + unsigned char masked_status; /* [o] shifted, masked scsi status */ + unsigned char msg_status; /* [o] messaging level data (optional) */ + unsigned char sb_len_wr; /* [o] byte count actually written to sbp */ + unsigned short host_status; /* [o] errors from host adapter */ + unsigned short driver_status; /* [o] errors from software driver */ + compat_int_t resid; /* [o] dxfer_len - actual_transferred */ + compat_uint_t duration; /* [o] time taken by cmd (unit: millisec) */ + compat_uint_t info; /* [o] auxiliary information */ +}; +#endif + #define SG_INTERFACE_ID_ORIG 'S' /* Use negative values to flag difference from original sg_header structure */ diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h index e05b95e83d5a..fb9dce4c6928 100644 --- a/include/sound/hdaudio.h +++ b/include/sound/hdaudio.h @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -330,6 +331,7 @@ struct hdac_bus { bool chip_init:1; /* h/w initialized */ /* behavior flags */ + bool aligned_mmio:1; /* aligned MMIO access */ bool sync_write:1; /* sync after verb write */ bool use_posbuf:1; /* use position buffer */ bool snoop:1; /* enable snooping */ @@ -405,34 +407,61 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus); unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask); void snd_hdac_aligned_write(unsigned int val, void __iomem *addr, unsigned int mask); -#define snd_hdac_reg_writeb(v, addr) snd_hdac_aligned_write(v, addr, 0xff) -#define snd_hdac_reg_writew(v, addr) snd_hdac_aligned_write(v, addr, 0xffff) -#define snd_hdac_reg_readb(addr) snd_hdac_aligned_read(addr, 0xff) -#define snd_hdac_reg_readw(addr) snd_hdac_aligned_read(addr, 0xffff) -#else /* CONFIG_SND_HDA_ALIGNED_MMIO */ -#define snd_hdac_reg_writeb(val, addr) writeb(val, addr) -#define snd_hdac_reg_writew(val, addr) writew(val, addr) -#define snd_hdac_reg_readb(addr) readb(addr) -#define snd_hdac_reg_readw(addr) readw(addr) -#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */ -#define snd_hdac_reg_writel(val, addr) writel(val, addr) -#define snd_hdac_reg_readl(addr) readl(addr) +#define snd_hdac_aligned_mmio(bus) (bus)->aligned_mmio +#else +#define snd_hdac_aligned_mmio(bus) false +#define snd_hdac_aligned_read(addr, mask) 0 +#define snd_hdac_aligned_write(val, addr, mask) do {} while (0) +#endif + +static inline void snd_hdac_reg_writeb(struct hdac_bus *bus, void __iomem *addr, + u8 val) +{ + if (snd_hdac_aligned_mmio(bus)) + snd_hdac_aligned_write(val, addr, 0xff); + else + writeb(val, addr); +} + +static inline void snd_hdac_reg_writew(struct hdac_bus *bus, void __iomem *addr, + u16 val) +{ + if (snd_hdac_aligned_mmio(bus)) + snd_hdac_aligned_write(val, addr, 0xffff); + else + writew(val, addr); +} + +static inline u8 snd_hdac_reg_readb(struct hdac_bus *bus, void __iomem *addr) +{ + return snd_hdac_aligned_mmio(bus) ? + snd_hdac_aligned_read(addr, 0xff) : readb(addr); +} + +static inline u16 snd_hdac_reg_readw(struct hdac_bus *bus, void __iomem *addr) +{ + return snd_hdac_aligned_mmio(bus) ? + snd_hdac_aligned_read(addr, 0xffff) : readw(addr); +} + +#define snd_hdac_reg_writel(bus, addr, val) writel(val, addr) +#define snd_hdac_reg_readl(bus, addr) readl(addr) /* * macros for easy use */ #define _snd_hdac_chip_writeb(chip, reg, value) \ - snd_hdac_reg_writeb(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writeb(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readb(chip, reg) \ - snd_hdac_reg_readb((chip)->remap_addr + (reg)) + snd_hdac_reg_readb(chip, (chip)->remap_addr + (reg)) #define _snd_hdac_chip_writew(chip, reg, value) \ - snd_hdac_reg_writew(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writew(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readw(chip, reg) \ - snd_hdac_reg_readw((chip)->remap_addr + (reg)) + snd_hdac_reg_readw(chip, (chip)->remap_addr + (reg)) #define _snd_hdac_chip_writel(chip, reg, value) \ - snd_hdac_reg_writel(value, (chip)->remap_addr + (reg)) + snd_hdac_reg_writel(chip, (chip)->remap_addr + (reg), value) #define _snd_hdac_chip_readl(chip, reg) \ - snd_hdac_reg_readl((chip)->remap_addr + (reg)) + snd_hdac_reg_readl(chip, (chip)->remap_addr + (reg)) /* read/write a register, pass without AZX_REG_ prefix */ #define snd_hdac_chip_writel(chip, reg, value) \ @@ -540,17 +569,17 @@ int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus, */ /* read/write a register, pass without AZX_REG_ prefix */ #define snd_hdac_stream_writel(dev, reg, value) \ - snd_hdac_reg_writel(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writel((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_writew(dev, reg, value) \ - snd_hdac_reg_writew(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writew((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_writeb(dev, reg, value) \ - snd_hdac_reg_writeb(value, (dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_writeb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg, value) #define snd_hdac_stream_readl(dev, reg) \ - snd_hdac_reg_readl((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readl((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) #define snd_hdac_stream_readw(dev, reg) \ - snd_hdac_reg_readw((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readw((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) #define snd_hdac_stream_readb(dev, reg) \ - snd_hdac_reg_readb((dev)->sd_addr + AZX_REG_ ## reg) + snd_hdac_reg_readb((dev)->bus, (dev)->sd_addr + AZX_REG_ ## reg) /* update a register, pass without AZX_REG_ prefix */ #define snd_hdac_stream_updatel(dev, reg, mask, val) \ diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 40ab20439fee..a36b7227a15a 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -77,9 +77,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 66122602bd08..697e2c0624dc 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -449,7 +449,7 @@ TRACE_EVENT_RCU(rcu_fqs, */ TRACE_EVENT_RCU(rcu_dyntick, - TP_PROTO(const char *polarity, long oldnesting, long newnesting, atomic_t dynticks), + TP_PROTO(const char *polarity, long oldnesting, long newnesting, int dynticks), TP_ARGS(polarity, oldnesting, newnesting, dynticks), @@ -464,7 +464,7 @@ TRACE_EVENT_RCU(rcu_dyntick, __entry->polarity = polarity; __entry->oldnesting = oldnesting; __entry->newnesting = newnesting; - __entry->dynticks = atomic_read(&dynticks); + __entry->dynticks = dynticks; ), TP_printk("%s %lx %lx %#3x", __entry->polarity, diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index ef50be4e5e6c..d94def25e4dc 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -67,8 +67,8 @@ DECLARE_EVENT_CLASS(writeback_page_template, TP_fast_assign( strscpy_pad(__entry->name, - mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", - 32); + bdi_dev_name(mapping ? inode_to_bdi(mapping->host) : + NULL), 32); __entry->ino = mapping ? mapping->host->i_ino : 0; __entry->index = page->index; ), @@ -111,8 +111,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, struct backing_dev_info *bdi = inode_to_bdi(inode); /* may be called for files on pseudo FSes w/ unregistered bdi */ - strscpy_pad(__entry->name, - bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->flags = flags; @@ -193,7 +192,7 @@ TRACE_EVENT(inode_foreign_history, ), TP_fast_assign( - strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32); + strncpy(__entry->name, bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); __entry->history = history; @@ -222,7 +221,7 @@ TRACE_EVENT(inode_switch_wbs, ), TP_fast_assign( - strncpy(__entry->name, dev_name(old_wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(old_wb->bdi), 32); __entry->ino = inode->i_ino; __entry->old_cgroup_ino = __trace_wb_assign_cgroup(old_wb); __entry->new_cgroup_ino = __trace_wb_assign_cgroup(new_wb); @@ -255,7 +254,7 @@ TRACE_EVENT(track_foreign_dirty, struct address_space *mapping = page_mapping(page); struct inode *inode = mapping ? mapping->host : NULL; - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->bdi_id = wb->bdi->id; __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; @@ -288,7 +287,7 @@ TRACE_EVENT(flush_foreign, ), TP_fast_assign( - strncpy(__entry->name, dev_name(wb->bdi->dev), 32); + strncpy(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); __entry->frn_bdi_id = frn_bdi_id; __entry->frn_memcg_id = frn_memcg_id; @@ -318,7 +317,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); @@ -361,9 +360,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(ino_t, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, - wb->bdi->dev ? dev_name(wb->bdi->dev) : - "(unknown)", 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->nr_pages = work->nr_pages; __entry->sb_dev = work->sb ? work->sb->s_dev : 0; __entry->sync_mode = work->sync_mode; @@ -416,7 +413,7 @@ DECLARE_EVENT_CLASS(writeback_class, __field(ino_t, cgroup_ino) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: cgroup_ino=%lu", @@ -438,7 +435,7 @@ TRACE_EVENT(writeback_bdi_register, __array(char, name, 32) ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); ), TP_printk("bdi %s", __entry->name @@ -463,7 +460,7 @@ DECLARE_EVENT_CLASS(wbc_class, ), TP_fast_assign( - strscpy_pad(__entry->name, dev_name(bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(bdi), 32); __entry->nr_to_write = wbc->nr_to_write; __entry->pages_skipped = wbc->pages_skipped; __entry->sync_mode = wbc->sync_mode; @@ -514,7 +511,7 @@ TRACE_EVENT(writeback_queue_io, ), TP_fast_assign( unsigned long *older_than_this = work->older_than_this; - strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); __entry->older = older_than_this ? *older_than_this : 0; __entry->age = older_than_this ? (jiffies - *older_than_this) * 1000 / HZ : -1; @@ -600,7 +597,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, ), TP_fast_assign( - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->write_bw = KBps(wb->write_bandwidth); __entry->avg_write_bw = KBps(wb->avg_write_bandwidth); __entry->dirty_rate = KBps(dirty_rate); @@ -665,7 +662,7 @@ TRACE_EVENT(balance_dirty_pages, TP_fast_assign( unsigned long freerun = (thresh + bg_thresh) / 2; - strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32); + strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32); __entry->limit = global_wb_domain.dirty_limit; __entry->setpoint = (global_wb_domain.dirty_limit + @@ -726,7 +723,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; @@ -800,7 +797,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, TP_fast_assign( strscpy_pad(__entry->name, - dev_name(inode_to_bdi(inode)->dev), 32); + bdi_dev_name(inode_to_bdi(inode)), 32); __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index a89eb0accd5e..a3e760886b8e 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -21,6 +21,7 @@ #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 #define FUTEX_CMP_REQUEUE_PI 12 +#define FUTEX_WAIT_MULTIPLE 31 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -40,6 +41,8 @@ FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_MULTIPLE_PRIVATE (FUTEX_WAIT_MULTIPLE | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at @@ -150,4 +153,21 @@ struct robust_list_head { (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) +/* + * Maximum number of multiple futexes to wait for + */ +#define FUTEX_MULTIPLE_MAX_COUNT 128 + +/** + * struct futex_wait_block - Block of futexes to be waited for + * @uaddr: User address of the futex + * @val: Futex value expected by userspace + * @bitset: Bitset for the optional bitmasked wakeup + */ +struct futex_wait_block { + __u32 __user *uaddr; + __u32 val; + __u32 bitset; +}; + #endif /* _UAPI_LINUX_FUTEX_H */ diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 55cfcb71606d..88693fed2c4b 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -175,6 +175,7 @@ struct io_uring_params { #define IORING_REGISTER_EVENTFD 4 #define IORING_UNREGISTER_EVENTFD 5 #define IORING_REGISTER_FILES_UPDATE 6 +#define IORING_REGISTER_EVENTFD_ASYNC 7 struct io_uring_files_update { __u32 offset; diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h new file mode 100644 index 000000000000..ae88be14c947 --- /dev/null +++ b/include/uapi/linux/wireguard.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Documentation + * ============= + * + * The below enums and macros are for interfacing with WireGuard, using generic + * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two + * methods: get and set. Note that while they share many common attributes, + * these two functions actually accept a slightly different set of inputs and + * outputs. + * + * WG_CMD_GET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain + * one but not both of: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * + * The kernel will then return several messages (NLM_F_MULTI) containing the + * following tree of nested items: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGDEVICE_A_LISTEN_PORT: NLA_U16 + * WGDEVICE_A_FWMARK: NLA_U32 + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN + * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16 + * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec + * WGPEER_A_RX_BYTES: NLA_U64 + * WGPEER_A_TX_BYTES: NLA_U64 + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32 + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that all of the allowed IPs of a single peer will not + * fit within a single netlink message. In that case, the same peer will + * be written in the following message, except it will only contain + * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several + * times in a row for the same peer. It is then up to the receiver to + * coalesce adjacent peers. Likewise, it is possible that all peers will + * not fit within a single message. So, subsequent peers will be sent + * in following messages, except those will only contain WGDEVICE_A_IFNAME + * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these + * messages to form the complete list of peers. + * + * Since this is an NLA_F_DUMP command, the final message will always be + * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message + * contains an integer error code. It is either zero or a negative error + * code corresponding to the errno. + * + * WG_CMD_SET_DEVICE + * ----------------- + * + * May only be called via NLM_F_REQUEST. The command should contain the + * following tree of nested items, containing one but not both of + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME: + * + * WGDEVICE_A_IFINDEX: NLA_U32 + * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current + * peers should be removed prior to adding the list below. + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove + * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly + * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable + * WGDEVICE_A_PEERS: NLA_NESTED + * 0: NLA_NESTED + * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN + * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the + * specified peer should not exist at the end of the + * operation, rather than added/updated and/or + * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed + * IPs of this peer should be removed prior to adding + * the list below and/or WGPEER_F_UPDATE_ONLY if the + * peer should only be set if it already exists. + * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove + * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6 + * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable + * WGPEER_A_ALLOWEDIPS: NLA_NESTED + * 0: NLA_NESTED + * WGALLOWEDIP_A_FAMILY: NLA_U16 + * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr + * WGALLOWEDIP_A_CIDR_MASK: NLA_U8 + * 0: NLA_NESTED + * ... + * 0: NLA_NESTED + * ... + * ... + * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at + * all by most users of this API, as the + * most recent protocol will be used when + * this is unset. Otherwise, must be set + * to 1. + * 0: NLA_NESTED + * ... + * ... + * + * It is possible that the amount of configuration data exceeds that of + * the maximum message length accepted by the kernel. In that case, several + * messages should be sent one after another, with each successive one + * filling in information not contained in the prior. Note that if + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably + * should not be specified in fragments that come after, so that the list + * of peers is only cleared the first time but appended after. Likewise for + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message + * of a peer, it likely should not be specified in subsequent fragments. + * + * If an error occurs, NLMSG_ERROR will reply containing an errno. + */ + +#ifndef _WG_UAPI_WIREGUARD_H +#define _WG_UAPI_WIREGUARD_H + +#define WG_GENL_NAME "wireguard" +#define WG_GENL_VERSION 1 + +#define WG_KEY_LEN 32 + +enum wg_cmd { + WG_CMD_GET_DEVICE, + WG_CMD_SET_DEVICE, + __WG_CMD_MAX +}; +#define WG_CMD_MAX (__WG_CMD_MAX - 1) + +enum wgdevice_flag { + WGDEVICE_F_REPLACE_PEERS = 1U << 0, + __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS +}; +enum wgdevice_attribute { + WGDEVICE_A_UNSPEC, + WGDEVICE_A_IFINDEX, + WGDEVICE_A_IFNAME, + WGDEVICE_A_PRIVATE_KEY, + WGDEVICE_A_PUBLIC_KEY, + WGDEVICE_A_FLAGS, + WGDEVICE_A_LISTEN_PORT, + WGDEVICE_A_FWMARK, + WGDEVICE_A_PEERS, + __WGDEVICE_A_LAST +}; +#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1) + +enum wgpeer_flag { + WGPEER_F_REMOVE_ME = 1U << 0, + WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1, + WGPEER_F_UPDATE_ONLY = 1U << 2, + __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS | + WGPEER_F_UPDATE_ONLY +}; +enum wgpeer_attribute { + WGPEER_A_UNSPEC, + WGPEER_A_PUBLIC_KEY, + WGPEER_A_PRESHARED_KEY, + WGPEER_A_FLAGS, + WGPEER_A_ENDPOINT, + WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, + WGPEER_A_LAST_HANDSHAKE_TIME, + WGPEER_A_RX_BYTES, + WGPEER_A_TX_BYTES, + WGPEER_A_ALLOWEDIPS, + WGPEER_A_PROTOCOL_VERSION, + __WGPEER_A_LAST +}; +#define WGPEER_A_MAX (__WGPEER_A_LAST - 1) + +enum wgallowedip_attribute { + WGALLOWEDIP_A_UNSPEC, + WGALLOWEDIP_A_FAMILY, + WGALLOWEDIP_A_IPADDR, + WGALLOWEDIP_A_CIDR_MASK, + __WGALLOWEDIP_A_LAST +}; +#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1) + +#endif /* _WG_UAPI_WIREGUARD_H */ diff --git a/init/Kconfig b/init/Kconfig index a34064a031a5..292b5d4f761e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -54,6 +54,7 @@ config CC_DISABLE_WARN_MAYBE_UNINITIALIZED config CONSTRUCTORS bool + depends on !UML config IRQ_WORK bool @@ -697,9 +698,20 @@ config GENERIC_SCHED_CLOCK menu "Scheduler features" +config SCHED_BMQ + bool "BMQ CPU scheduler" + help + The BitMap Queue CPU scheduler for excellent interactivity and + responsiveness on the desktop and solid scalability on normal + hardware and commodity servers. + + Say Y here. + default y + config UCLAMP_TASK bool "Enable utilization clamping for RT/FAIR tasks" depends on CPU_FREQ_GOV_SCHEDUTIL + depends on !SCHED_BMQ help This feature enables the scheduler to track the clamped utilization of each CPU based on RUNNABLE tasks scheduled on that CPU. @@ -766,8 +778,7 @@ config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH bool config CC_HAS_INT128 - def_bool y - depends on !$(cc-option,-D__SIZEOF_INT128__=0) + def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT # # For architectures that know their GCC __int128 support is sound @@ -786,6 +797,7 @@ config NUMA_BALANCING depends on ARCH_SUPPORTS_NUMA_BALANCING depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY depends on SMP && NUMA && MIGRATION + depends on !SCHED_BMQ help This option adds support for automatic NUMA aware memory/task placement. The mechanism is quite primitive and is based on migrating memory when @@ -887,7 +899,7 @@ menuconfig CGROUP_SCHED bandwidth allocation to such task groups. It uses cgroups to group tasks. -if CGROUP_SCHED +if CGROUP_SCHED && !SCHED_BMQ config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED @@ -1102,6 +1114,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1134,6 +1162,7 @@ config CHECKPOINT_RESTORE config SCHED_AUTOGROUP bool "Automatic process group scheduling" + depends on !SCHED_BMQ select CGROUPS select CGROUP_SCHED select FAIR_GROUP_SCHED @@ -1228,7 +1257,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" - depends on ARC imply CC_DISABLE_WARN_MAYBE_UNINITIALIZED # avoid false positives help Choosing this option will pass "-O3" to your compiler to optimize diff --git a/init/init_task.c b/init/init_task.c index 9e5cbe5eab7b..c293de91d90f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -66,9 +66,15 @@ struct task_struct init_task .stack = init_stack, .usage = REFCOUNT_INIT(2), .flags = PF_KTHREAD, +#ifdef CONFIG_SCHED_BMQ + .prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, + .static_prio = DEFAULT_PRIO, + .normal_prio = DEFAULT_PRIO + MAX_PRIORITY_ADJ, +#else .prio = MAX_PRIO - 20, .static_prio = MAX_PRIO - 20, .normal_prio = MAX_PRIO - 20, +#endif .policy = SCHED_NORMAL, .cpus_ptr = &init_task.cpus_mask, .cpus_mask = CPU_MASK_ALL, @@ -78,6 +84,12 @@ struct task_struct init_task .restart_block = { .fn = do_no_restart_syscall, }, +#ifdef CONFIG_SCHED_BMQ + .boost_prio = 0, + .bmq_idx = 15, + .bmq_node = LIST_HEAD_INIT(init_task.bmq_node), + .time_slice = HZ, +#else .se = { .group_node = LIST_HEAD_INIT(init_task.se.group_node), }, @@ -85,6 +97,7 @@ struct task_struct init_task .run_list = LIST_HEAD_INIT(init_task.rt.run_list), .time_slice = RR_TIMESLICE, }, +#endif .tasks = LIST_HEAD_INIT(init_task.tasks), #ifdef CONFIG_SMP .pushable_tasks = PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO), diff --git a/ipc/msg.c b/ipc/msg.c index 8dec945fa030..767587ab45a3 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -377,7 +377,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version) * NOTE: no locks must be held, the rwsem is taken inside this function. */ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, - struct msqid64_ds *msqid64) + struct ipc64_perm *perm, int msg_qbytes) { struct kern_ipc_perm *ipcp; struct msg_queue *msq; @@ -387,7 +387,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, rcu_read_lock(); ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd, - &msqid64->msg_perm, msqid64->msg_qbytes); + perm, msg_qbytes); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_unlock1; @@ -409,18 +409,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, { DEFINE_WAKE_Q(wake_q); - if (msqid64->msg_qbytes > ns->msg_ctlmnb && + if (msg_qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE)) { err = -EPERM; goto out_unlock1; } ipc_lock_object(&msq->q_perm); - err = ipc_update_perm(&msqid64->msg_perm, ipcp); + err = ipc_update_perm(perm, ipcp); if (err) goto out_unlock0; - msq->q_qbytes = msqid64->msg_qbytes; + msq->q_qbytes = msg_qbytes; msq->q_ctime = ktime_get_real_seconds(); /* @@ -601,9 +601,10 @@ static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int ver case IPC_SET: if (copy_msqid_from_user(&msqid64, buf, version)) return -EFAULT; - /* fallthru */ + return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, + msqid64.msg_qbytes); case IPC_RMID: - return msgctl_down(ns, msqid, cmd, &msqid64); + return msgctl_down(ns, msqid, cmd, NULL, 0); default: return -EINVAL; } @@ -735,9 +736,9 @@ static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int versio case IPC_SET: if (copy_compat_msqid_from_user(&msqid64, uptr, version)) return -EFAULT; - /* fallthru */ + return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes); case IPC_RMID: - return msgctl_down(ns, msqid, cmd, &msqid64); + return msgctl_down(ns, msqid, cmd, NULL, 0); default: return -EINVAL; } diff --git a/ipc/sem.c b/ipc/sem.c index ec97a7072413..fe12ea8dd2b3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2368,11 +2368,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { diff --git a/kernel/audit.c b/kernel/audit.c index 8e09f0f55b4b..f971cd636426 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1100,13 +1100,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1174,6 +1172,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1187,6 +1186,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1210,7 +1210,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1314,7 +1314,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1326,6 +1328,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1333,26 +1337,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_user_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(audit_context(), &ab, @@ -1364,7 +1366,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1379,7 +1381,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1455,7 +1457,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b0126e9c0743..026e34da4ace 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -456,6 +456,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -464,12 +465,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -485,7 +486,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -494,11 +495,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -511,11 +513,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -524,68 +528,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; + } + entry->rule.buflen += f_val; entry->rule.filterkey = str; break; case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) + if (entry->rule.exe || f_val > PATH_MAX) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 3d3d61b5985b..b4b6b77f309c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -293,7 +293,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) struct hlist_head *head = dev_map_index_hash(dtab, key); struct bpf_dtab_netdev *dev; - hlist_for_each_entry_rcu(dev, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist, + lockdep_is_held(&dtab->index_lock)) if (dev->idx == key) return dev; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index ecf42bec38c0..6f22e0e74ef2 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -196,6 +196,7 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) void *key = map_iter(m)->key; void *prev_key; + (*pos)++; if (map_iter(m)->done) return NULL; @@ -208,8 +209,6 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) map_iter(m)->done = true; return NULL; } - - ++(*pos); return key; } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 5b9da0954a27..3668a0bc18ec 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info, ulen = info->jited_prog_len; info->jited_prog_len = aux->offload->jited_len; - if (info->jited_prog_len & ulen) { + if (info->jited_prog_len && ulen) { uinsns = u64_to_user_ptr(info->jited_prog_insns); ulen = min_t(u32, info->jited_prog_len, ulen); if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) { diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 23b0d5cfd47e..88dee4b09f53 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -150,6 +150,14 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) if (fexit_cnt) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; + /* Though the second half of trampoline page is unused a task could be + * preempted in the middle of the first half of trampoline and two + * updates to trampoline would change the code from underneath the + * preempted task. Hence wait for tasks to voluntarily schedule or go + * to userspace. + */ + synchronize_rcu_tasks(); + err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags, fentry, fentry_cnt, fexit, fexit_cnt, @@ -240,6 +248,8 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; + /* wait for tasks to get out of trampoline before freeing it */ + synchronize_rcu_tasks(); bpf_jit_free_exec(tr->image); hlist_del(&tr->hlist); kfree(tr); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 735af8f15f95..d7d24285c0d2 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3055,8 +3055,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; @@ -3066,6 +3064,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) @@ -3101,11 +3101,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!css) continue; + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); @@ -3392,7 +3392,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, if (strcmp(strstrip(buf), "threaded")) return -EINVAL; - cgrp = cgroup_kn_lock_live(of->kn, false); + /* drain dying csses before we re-apply (threaded) subtree control */ + cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENOENT; @@ -3546,21 +3547,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v) static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; + struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; return psi_show(seq, psi, PSI_CPU); } @@ -4404,12 +4405,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4467,10 +4472,14 @@ static void css_task_iter_advance(struct css_task_iter *it) else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4489,11 +4498,12 @@ static void css_task_iter_advance(struct css_task_iter *it) goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4599,6 +4609,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4614,7 +4627,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4622,10 +4635,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -5931,11 +5945,14 @@ void cgroup_post_fork(struct task_struct *child) spin_lock_irq(&css_set_lock); - WARN_ON_ONCE(!list_empty(&child->cg_list)); - cset = task_css_set(current); /* current is @child's parent */ - get_css_set(cset); - cset->nr_tasks++; - css_set_move_task(child, NULL, cset, false); + /* init tasks are special, only link regular threads */ + if (likely(child->pid)) { + WARN_ON_ONCE(!list_empty(&child->cg_list)); + cset = task_css_set(current); /* current is @child's parent */ + get_css_set(cset); + cset->nr_tasks++; + css_set_move_task(child, NULL, cset, false); + } /* * If the cgroup has to be frozen, the new task has too. Let's set @@ -6259,6 +6276,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 58f5073acff7..9301e25986d3 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -632,7 +632,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) return ret; } -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_BMQ) /* * Helper routine for generate_sched_domains(). * Do cpusets a, b have overlapping effective cpus_allowed masks? @@ -1005,7 +1005,7 @@ static void rebuild_sched_domains_locked(void) /* Have scheduler rebuild the domains */ partition_and_rebuild_sched_domains(ndoms, doms, attr); } -#else /* !CONFIG_SMP */ +#else /* !CONFIG_SMP || CONFIG_SCHED_BMQ */ static void rebuild_sched_domains_locked(void) { } diff --git a/kernel/cpu.c b/kernel/cpu.c index 4dc279ed3b2d..9c706af713fb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -525,8 +525,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; - /* Unpark the stopper thread and the hotplug thread of the target cpu */ - stop_machine_unpark(cpu); + /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* @@ -1089,8 +1088,8 @@ void notify_cpu_starting(unsigned int cpu) /* * Called from the idle task. Wake up the controlling task which brings the - * stopper and the hotplug thread of the upcoming CPU up and then delegates - * the rest of the online bringup to the hotplug thread. + * hotplug thread of the upcoming CPU up and then delegates the rest of the + * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { @@ -1100,6 +1099,12 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + /* + * Unpart the stopper thread before we start the idle loop (and start + * scheduling); this ensures the stopper task is always available. + */ + stop_machine_unpark(smp_processor_id()); + st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 27725754ac99..769d773c7182 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -106,7 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; - t3 = tsk->se.sum_exec_runtime; + t3 = tsk_seruntime(tsk); d->cpu_count += t1; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 6af7ae83c4ad..32ec69cdba54 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -472,28 +472,26 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, } #endif /* CONFIG_MMU */ -/* - * Because 32-bit DMA masks are so common we expect every architecture to be - * able to satisfy them - either by not supporting more physical memory, or by - * providing a ZONE_DMA32. If neither is the case, the architecture needs to - * use an IOMMU instead of the direct mapping. - */ int dma_direct_supported(struct device *dev, u64 mask) { - u64 min_mask; - - if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = DMA_BIT_MASK(zone_dma_bits); - else - min_mask = DMA_BIT_MASK(32); + u64 min_mask = (max_pfn - 1) << PAGE_SHIFT; - min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); + /* + * Because 32-bit DMA masks are so common we expect every architecture + * to be able to satisfy them - either by not supporting more physical + * memory, or by providing a ZONE_DMA32. If neither is the case, the + * architecture needs to use an IOMMU instead of the direct mapping. + */ + if (mask >= DMA_BIT_MASK(32)) + return 1; /* * This check needs to be against the actual bit mask value, so * use __phys_to_dma() here so that the SME encryption mask isn't * part of the check. */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); return mask >= __phys_to_dma(dev, min_mask); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 2173c23c25b4..fdb7f7ef380c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -951,9 +951,9 @@ list_update_cgroup_event(struct perf_event *event, /* * Because cgroup events are always per-cpu events, - * this will always be called from the right CPU. + * @ctx == &cpuctx->ctx. */ - cpuctx = __get_cpu_context(ctx); + cpuctx = container_of(ctx, struct perf_cpu_context, ctx); /* * Since setting cpuctx->cgrp is conditional on the current @cgrp @@ -979,7 +979,8 @@ list_update_cgroup_event(struct perf_event *event, cpuctx_entry = &cpuctx->cgrp_cpuctx_entry; if (add) - list_add(cpuctx_entry, this_cpu_ptr(&cgrp_cpuctx_list)); + list_add(cpuctx_entry, + per_cpu_ptr(&cgrp_cpuctx_list, event->cpu)); else list_del(cpuctx_entry); } @@ -5916,7 +5917,15 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) */ user_lock_limit *= num_online_cpus(); - user_locked = atomic_long_read(&user->locked_vm) + user_extra; + user_locked = atomic_long_read(&user->locked_vm); + + /* + * sysctl_perf_event_mlock may have changed, so that + * user->locked_vm > user_lock_limit + */ + if (user_locked > user_lock_limit) + user_locked = user_lock_limit; + user_locked += user_extra; if (user_locked > user_lock_limit) { /* diff --git a/kernel/exit.c b/kernel/exit.c index 2833ffb0c211..37a1f8d73eee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -131,7 +131,7 @@ static void __exit_signal(struct task_struct *tsk) sig->curr_target = next_thread(tsk); } - add_device_randomness((const void*) &tsk->se.sum_exec_runtime, + add_device_randomness((const void*) &tsk_seruntime(tsk), sizeof(unsigned long long)); /* @@ -152,7 +152,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); - sig->sum_sched_runtime += tsk->se.sum_exec_runtime; + sig->sum_sched_runtime += tsk_seruntime(tsk); sig->nr_threads--; __unhash_process(tsk, group_dead); write_sequnlock(&sig->stats_lock); diff --git a/kernel/fork.c b/kernel/fork.c index 080809560072..1cb7b827b57b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,11 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#else +#define unprivileged_userns_clone 0 +#endif /* * Minimum number of threads to boot the kernel @@ -1843,6 +1848,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2923,6 +2932,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/futex.c b/kernel/futex.c index 0cf84c8664f2..80c399deddec 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -215,6 +215,8 @@ struct futex_pi_state { * @rt_waiter: rt_waiter storage for use with requeue_pi * @requeue_pi_key: the requeue_pi target futex key * @bitset: bitset for the optional bitmasked wakeup + * @uaddr: userspace address of futex + * @uval: expected futex's value * * We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so * we can wake only the relevant ones (hashed queues may be shared). @@ -237,6 +239,8 @@ struct futex_q { struct rt_mutex_waiter *rt_waiter; union futex_key *requeue_pi_key; u32 bitset; + u32 __user *uaddr; + u32 uval; } __randomize_layout; static const struct futex_q futex_q_init = { @@ -385,9 +389,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } @@ -429,7 +433,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ + smp_mb(); /* explicit smp_mb(); (B) */ break; case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies smp_mb(); (B) */ @@ -463,7 +467,6 @@ static void drop_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); @@ -505,6 +508,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, return timeout; } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex @@ -517,9 +560,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, * * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ @@ -659,8 +708,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -692,40 +739,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel which is almost certainly an - * application bug. In such a case, just retry. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (!atomic_inc_not_zero(&inode->i_count)) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; + key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + out: put_page(page); return err; @@ -2420,6 +2441,29 @@ static int unqueue_me(struct futex_q *q) return ret; } +/** + * unqueue_multiple() - Remove several futexes from their futex_hash_bucket + * @q: The list of futexes to unqueue + * @count: Number of futexes in the list + * + * Helper to unqueue a list of futexes. This can't fail. + * + * Return: + * - >=0 - Index of the last futex that was awoken; + * - -1 - If no futex was awoken + */ +static int unqueue_multiple(struct futex_q *q, int count) +{ + int ret = -1; + int i; + + for (i = 0; i < count; i++) { + if (!unqueue_me(&q[i])) + ret = i; + } + return ret; +} + /* * PI futexes can not be requeued and must remove themself from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry @@ -2783,6 +2827,211 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, return ret; } +/** + * futex_wait_multiple_setup() - Prepare to wait and enqueue multiple futexes + * @qs: The corresponding futex list + * @count: The size of the lists + * @flags: Futex flags (FLAGS_SHARED, etc.) + * @awaken: Index of the last awoken futex + * + * Prepare multiple futexes in a single step and enqueue them. This may fail if + * the futex list is invalid or if any futex was already awoken. On success the + * task is ready to interruptible sleep. + * + * Return: + * - 1 - One of the futexes was awaken by another thread + * - 0 - Success + * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL + */ +static int futex_wait_multiple_setup(struct futex_q *qs, int count, + unsigned int flags, int *awaken) +{ + struct futex_hash_bucket *hb; + int ret, i; + u32 uval; + + /* + * Enqueuing multiple futexes is tricky, because we need to + * enqueue each futex in the list before dealing with the next + * one to avoid deadlocking on the hash bucket. But, before + * enqueuing, we need to make sure that current->state is + * TASK_INTERRUPTIBLE, so we don't absorb any awake events, which + * cannot be done before the get_futex_key of the next key, + * because it calls get_user_pages, which can sleep. Thus, we + * fetch the list of futexes keys in two steps, by first pinning + * all the memory keys in the futex key, and only then we read + * each key and queue the corresponding futex. + */ +retry: + for (i = 0; i < count; i++) { + qs[i].key = FUTEX_KEY_INIT; + ret = get_futex_key(qs[i].uaddr, flags & FLAGS_SHARED, + &qs[i].key, FUTEX_READ); + if (unlikely(ret)) { + for (--i; i >= 0; i--) + put_futex_key(&qs[i].key); + return ret; + } + } + + set_current_state(TASK_INTERRUPTIBLE); + + for (i = 0; i < count; i++) { + struct futex_q *q = &qs[i]; + + hb = queue_lock(q); + + ret = get_futex_value_locked(&uval, q->uaddr); + if (ret) { + /* + * We need to try to handle the fault, which + * cannot be done without sleep, so we need to + * undo all the work already done, to make sure + * we don't miss any wake ups. Therefore, clean + * up, handle the fault and retry from the + * beginning. + */ + queue_unlock(hb); + + /* + * Keys 0..(i-1) are implicitly put + * on unqueue_multiple. + */ + put_futex_key(&q->key); + + *awaken = unqueue_multiple(qs, i); + + __set_current_state(TASK_RUNNING); + + /* + * On a real fault, prioritize the error even if + * some other futex was awoken. Userspace gave + * us a bad address, -EFAULT them. + */ + ret = get_user(uval, q->uaddr); + if (ret) + return ret; + + /* + * Even if the page fault was handled, If + * something was already awaken, we can safely + * give up and succeed to give a hint for userspace to + * acquire the right futex faster. + */ + if (*awaken >= 0) + return 1; + + goto retry; + } + + if (uval != q->uval) { + queue_unlock(hb); + + put_futex_key(&qs[i].key); + + /* + * If something was already awaken, we can + * safely ignore the error and succeed. + */ + *awaken = unqueue_multiple(qs, i); + __set_current_state(TASK_RUNNING); + if (*awaken >= 0) + return 1; + + return -EWOULDBLOCK; + } + + /* + * The bucket lock can't be held while dealing with the + * next futex. Queue each futex at this moment so hb can + * be unlocked. + */ + queue_me(&qs[i], hb); + } + return 0; +} + +/** + * futex_wait_multiple() - Prepare to wait on and enqueue several futexes + * @qs: The list of futexes to wait on + * @op: Operation code from futex's syscall + * @count: The number of objects + * @abs_time: Timeout before giving up and returning to userspace + * + * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function + * sleeps on a group of futexes and returns on the first futex that + * triggered, or after the timeout has elapsed. + * + * Return: + * - >=0 - Hint to the futex that was awoken + * - <0 - On error + */ +static int futex_wait_multiple(struct futex_q *qs, int op, + u32 count, ktime_t *abs_time) +{ + struct hrtimer_sleeper timeout, *to; + int ret, flags = 0, hint = 0; + unsigned int i; + + if (!(op & FUTEX_PRIVATE_FLAG)) + flags |= FLAGS_SHARED; + + if (op & FUTEX_CLOCK_REALTIME) + flags |= FLAGS_CLOCKRT; + + to = futex_setup_timer(abs_time, &timeout, flags, 0); + while (1) { + ret = futex_wait_multiple_setup(qs, count, flags, &hint); + if (ret) { + if (ret > 0) { + /* A futex was awaken during setup */ + ret = hint; + } + break; + } + + if (to) + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); + + /* + * Avoid sleeping if another thread already tried to + * wake us. + */ + for (i = 0; i < count; i++) { + if (plist_node_empty(&qs[i].list)) + break; + } + + if (i == count && (!to || to->task)) + freezable_schedule(); + + ret = unqueue_multiple(qs, count); + + __set_current_state(TASK_RUNNING); + + if (ret >= 0) + break; + if (to && !to->task) { + ret = -ETIMEDOUT; + break; + } else if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + /* + * The final case is a spurious wakeup, for + * which just retry. + */ + } + + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + + return ret; +} + static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) { @@ -3907,6 +4156,43 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } +/** + * futex_read_wait_block - Read an array of futex_wait_block from userspace + * @uaddr: Userspace address of the block + * @count: Number of blocks to be read + * + * This function creates and allocate an array of futex_q (we zero it to + * initialize the fields) and then, for each futex_wait_block element from + * userspace, fill a futex_q element with proper values. + */ +inline struct futex_q *futex_read_wait_block(u32 __user *uaddr, u32 count) +{ + unsigned int i; + struct futex_q *qs; + struct futex_wait_block fwb; + struct futex_wait_block __user *entry = + (struct futex_wait_block __user *)uaddr; + + if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) + return ERR_PTR(-EINVAL); + + qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); + if (!qs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { + kfree(qs); + return ERR_PTR(-EFAULT); + } + + qs[i].uaddr = fwb.uaddr; + qs[i].uval = fwb.val; + qs[i].bitset = fwb.bitset; + } + + return qs; +} SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, @@ -3919,7 +4205,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { + cmd == FUTEX_WAIT_REQUEUE_PI || + cmd == FUTEX_WAIT_MULTIPLE)) { if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; if (get_timespec64(&ts, utime)) @@ -3928,7 +4215,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return -EINVAL; t = timespec64_to_ktime(ts); - if (cmd == FUTEX_WAIT) + if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) t = ktime_add_safe(ktime_get(), t); tp = &t; } @@ -3940,6 +4227,25 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; + if (cmd == FUTEX_WAIT_MULTIPLE) { + int ret; + struct futex_q *qs; + +#ifdef CONFIG_X86_X32 + if (unlikely(in_x32_syscall())) + return -ENOSYS; +#endif + qs = futex_read_wait_block(uaddr, val); + + if (IS_ERR(qs)) + return PTR_ERR(qs); + + ret = futex_wait_multiple(qs, op, val, tp); + kfree(qs); + + return ret; + } + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } @@ -4102,6 +4408,58 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, #endif /* CONFIG_COMPAT */ #ifdef CONFIG_COMPAT_32BIT_TIME +/** + * struct compat_futex_wait_block - Block of futexes to be waited for + * @uaddr: User address of the futex (compatible pointer) + * @val: Futex value expected by userspace + * @bitset: Bitset for the optional bitmasked wakeup + */ +struct compat_futex_wait_block { + compat_uptr_t uaddr; + __u32 pad; + __u32 val; + __u32 bitset; +}; + +/** + * compat_futex_read_wait_block - Read an array of futex_wait_block from + * userspace + * @uaddr: Userspace address of the block + * @count: Number of blocks to be read + * + * This function does the same as futex_read_wait_block(), except that it + * converts the pointer to the futex from the compat version to the regular one. + */ +inline struct futex_q *compat_futex_read_wait_block(u32 __user *uaddr, + u32 count) +{ + unsigned int i; + struct futex_q *qs; + struct compat_futex_wait_block fwb; + struct compat_futex_wait_block __user *entry = + (struct compat_futex_wait_block __user *)uaddr; + + if (!count || count > FUTEX_MULTIPLE_MAX_COUNT) + return ERR_PTR(-EINVAL); + + qs = kcalloc(count, sizeof(*qs), GFP_KERNEL); + if (!qs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) { + kfree(qs); + return ERR_PTR(-EFAULT); + } + + qs[i].uaddr = compat_ptr(fwb.uaddr); + qs[i].uval = fwb.val; + qs[i].bitset = fwb.bitset; + } + + return qs; +} + SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) @@ -4113,14 +4471,15 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { + cmd == FUTEX_WAIT_REQUEUE_PI || + cmd == FUTEX_WAIT_MULTIPLE)) { if (get_old_timespec32(&ts, utime)) return -EFAULT; if (!timespec64_valid(&ts)) return -EINVAL; t = timespec64_to_ktime(ts); - if (cmd == FUTEX_WAIT) + if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE) t = ktime_add_safe(ktime_get(), t); tp = &t; } @@ -4128,6 +4487,19 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (int) (unsigned long) utime; + if (cmd == FUTEX_WAIT_MULTIPLE) { + int ret; + struct futex_q *qs = compat_futex_read_wait_block(uaddr, val); + + if (IS_ERR(qs)) + return PTR_ERR(qs); + + ret = futex_wait_multiple(qs, op, val, tp); + kfree(qs); + + return ret; + } + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */ diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index 060e8e726755..3941a9c48f83 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS - select CONSTRUCTORS + select CONSTRUCTORS if !UML default n ---help--- This option enables gcov-based code profiling (e.g. for code coverage diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index c1eccd4f6520..a949bd39e343 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -114,6 +114,7 @@ static const struct irq_bit_descr irqdata_states[] = { BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), BIT_MASK_DESCR(IRQD_CAN_RESERVE), + BIT_MASK_DESCR(IRQD_MSI_NOMASK_QUIRK), BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 3924fbe829d4..c9d8eb7f5c02 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -128,8 +128,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index dd822fd8a7d5..480df3659720 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1459,6 +1459,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (rv) { /* Restore the original irq_data. */ *root_irq_data = *child_irq_data; + kfree(child_irq_data); goto error; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1753486b440c..55b080101a20 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -442,23 +442,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ad26fbcfbfc8..eb95f6106a1e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -453,8 +453,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, continue; irq_data = irq_domain_get_irq_data(domain, desc->irq); - if (!can_reserve) + if (!can_reserve) { irqd_clr_can_reserve(irq_data); + if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + irqd_set_msi_nomask_quirk(irq_data); + } ret = irq_domain_activate_irq(irq_data, can_reserve); if (ret) goto cleanup; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index cfc4f088a0e7..f5958c55406f 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -111,6 +111,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 53534aa258a6..2625c241ac00 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -510,6 +510,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -610,6 +612,18 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } +static bool optprobe_queued_unopt(struct optimized_kprobe *op) +{ + struct optimized_kprobe *_op; + + list_for_each_entry(_op, &unoptimizing_list, list) { + if (op == _op) + return true; + } + + return false; +} + /* Optimize kprobe if p is ready to be optimized */ static void optimize_kprobe(struct kprobe *p) { @@ -631,17 +645,21 @@ static void optimize_kprobe(struct kprobe *p) return; /* Check if it is already optimized. */ - if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) + if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { + if (optprobe_queued_unopt(op)) { + /* This is under unoptimizing. Just dequeue the probe */ + list_del_init(&op->list); + } return; + } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + if (WARN_ON_ONCE(!list_empty(&op->list))) + return; + + list_add(&op->list, &optimizing_list); + kick_kprobe_optimizer(); } /* Short cut to direct unoptimizing */ @@ -649,6 +667,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -662,31 +681,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; /* This is not an optprobe nor optimized */ op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ - list_del_init(&op->list); - force_unoptimize_kprobe(op); - } + if (!kprobe_optimized(p)) return; - } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ + if (force) { + /* + * Forcibly unoptimize the kprobe here, and queue it + * in the freeing list for release afterwards. + */ + force_unoptimize_kprobe(op); + list_move(&op->list, &freeing_list); + } + } else { + /* Dequeue from the optimizing queue */ + list_del_init(&op->list); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; + } return; } + /* Optimized kprobe case */ - if (force) + if (force) { /* Forcibly update the code: this is a special case */ force_unoptimize_kprobe(op); - else { + } else { list_add(&op->list, &unoptimizing_list); kick_kprobe_optimizer(); } diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index f6310f848f34..3ad290e9fed8 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -306,7 +306,11 @@ static bool klp_try_switch_task(struct task_struct *task) */ rq = task_rq_lock(task, &flags); +#ifdef CONFIG_SCHED_BMQ + if (task_running(task) && task != current) { +#else if (task_running(rq, task) && task != current) { +#endif snprintf(err_buf, STACK_ERR_BUF_SIZE, "%s: %s:%d is running\n", __func__, task->comm, task->pid); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index dadb7b7fba37..9bb6d2497b04 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -286,9 +286,9 @@ static int lockdep_stats_show(struct seq_file *m, void *v) seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n", nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES); #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) - seq_printf(m, " number of stack traces: %llu\n", + seq_printf(m, " number of stack traces: %11llu\n", lockdep_stack_trace_count()); - seq_printf(m, " number of stack hash chains: %llu\n", + seq_printf(m, " number of stack hash chains: %11llu\n", lockdep_stack_hash_count()); #endif seq_printf(m, " combined max dependencies: %11u\n", diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 851bbb10819d..019fdab7e329 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -229,7 +229,7 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, * Only use with rt_mutex_waiter_{less,equal}() */ #define task_to_waiter(p) \ - &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = __tsk_deadline(p) } static inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, @@ -680,7 +680,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * the values of the node being removed. */ waiter->prio = task->prio; - waiter->deadline = task->dl.deadline; + waiter->deadline = __tsk_deadline(task); rt_mutex_enqueue(lock, waiter); @@ -953,7 +953,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, waiter->task = task; waiter->lock = lock; waiter->prio = task->prio; - waiter->deadline = task->dl.deadline; + waiter->deadline = __tsk_deadline(task); /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) diff --git a/kernel/module.c b/kernel/module.c index b56f3224b161..4810ce0fbbca 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -214,7 +214,8 @@ static struct module *mod_find(unsigned long addr) { struct module *mod; - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { if (within_module(addr, mod)) return mod; } @@ -448,7 +449,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { struct symsearch arr[] = { { mod->syms, mod->syms + mod->num_syms, mod->crcs, NOT_GPL_ONLY, false }, @@ -616,7 +618,8 @@ static struct module *find_module_all(const char *name, size_t len, module_assert_mutex_or_preempt(); - list_for_each_entry_rcu(mod, &modules, list) { + list_for_each_entry_rcu(mod, &modules, list, + lockdep_is_held(&module_mutex)) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; if (strlen(mod->name) == len && !memcmp(mod->name, name, len)) @@ -1781,6 +1784,8 @@ static int module_add_modinfo_attrs(struct module *mod) error_out: if (i > 0) module_remove_modinfo_attrs(mod, --i); + else + kfree(mod->modinfo_attrs); return error; } @@ -3054,9 +3059,7 @@ static int setup_load_info(struct load_info *info, int flags) /* Try to find a name early so we can log errors with a module name */ info->index.info = find_sec(info, ".modinfo"); - if (!info->index.info) - info->name = "(missing .modinfo section)"; - else + if (info->index.info) info->name = get_modinfo(info, "name"); /* Find internal symbols and strings. */ @@ -3071,14 +3074,15 @@ static int setup_load_info(struct load_info *info, int flags) } if (info->index.sym == 0) { - pr_warn("%s: module has no symbols (stripped?)\n", info->name); + pr_warn("%s: module has no symbols (stripped?)\n", + info->name ?: "(missing .modinfo section or name field)"); return -ENOEXEC; } info->index.mod = find_sec(info, ".gnu.linkonce.this_module"); if (!info->index.mod) { pr_warn("%s: No module found in object\n", - info->name ?: "(missing .modinfo name field)"); + info->name ?: "(missing .modinfo section or name field)"); return -ENOEXEC; } /* This is temporary: point mod into copy of data. */ diff --git a/kernel/notifier.c b/kernel/notifier.c index 63d7501ac638..5989bbb93039 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/kernel/padata.c b/kernel/padata.c index c3fec1413295..fda7a7039422 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -35,6 +35,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -87,7 +89,7 @@ static void padata_parallel_worker(struct work_struct *parallel_work) /** * padata_do_parallel - padata parallelization function * - * @pinst: padata instance + * @ps: padatashell * @padata: object to be parallelized * @cb_cpu: pointer to the CPU that the serialization callback function should * run on. If it's not in the serial cpumask of @pinst @@ -98,16 +100,17 @@ static void padata_parallel_worker(struct work_struct *parallel_work) * Note: Every object which is parallelized by padata_do_parallel * must be seen by padata_do_serial. */ -int padata_do_parallel(struct padata_instance *pinst, +int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu) { + struct padata_instance *pinst = ps->pinst; int i, cpu, cpu_index, target_cpu, err; struct padata_parallel_queue *queue; struct parallel_data *pd; rcu_read_lock_bh(); - pd = rcu_dereference_bh(pinst->pd); + pd = rcu_dereference_bh(ps->pd); err = -EINVAL; if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID) @@ -210,10 +213,10 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd, static void padata_reorder(struct parallel_data *pd) { + struct padata_instance *pinst = pd->ps->pinst; int cb_cpu; struct padata_priv *padata; struct padata_serial_queue *squeue; - struct padata_instance *pinst = pd->pinst; struct padata_parallel_queue *next_queue; /* @@ -283,6 +286,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -292,6 +296,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -301,9 +307,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -341,36 +350,39 @@ void padata_do_serial(struct padata_priv *padata) } EXPORT_SYMBOL(padata_do_serial); -static int padata_setup_cpumasks(struct parallel_data *pd, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) +static int padata_setup_cpumasks(struct padata_instance *pinst) { struct workqueue_attrs *attrs; + int err; + + attrs = alloc_workqueue_attrs(); + if (!attrs) + return -ENOMEM; + + /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ + cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu); + err = apply_workqueue_attrs(pinst->parallel_wq, attrs); + free_workqueue_attrs(attrs); + + return err; +} + +static int pd_setup_cpumasks(struct parallel_data *pd, + const struct cpumask *pcpumask, + const struct cpumask *cbcpumask) +{ int err = -ENOMEM; if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) goto out; - cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); - if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) goto free_pcpu_mask; - cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); - - attrs = alloc_workqueue_attrs(); - if (!attrs) - goto free_cbcpu_mask; - /* Restrict parallel_wq workers to pd->cpumask.pcpu. */ - cpumask_copy(attrs->cpumask, pd->cpumask.pcpu); - err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs); - free_workqueue_attrs(attrs); - if (err < 0) - goto free_cbcpu_mask; + cpumask_copy(pd->cpumask.pcpu, pcpumask); + cpumask_copy(pd->cpumask.cbcpu, cbcpumask); return 0; -free_cbcpu_mask: - free_cpumask_var(pd->cpumask.cbcpu); free_pcpu_mask: free_cpumask_var(pd->cpumask.pcpu); out: @@ -414,12 +426,16 @@ static void padata_init_pqueues(struct parallel_data *pd) } /* Allocate and initialize the internal cpumask dependend resources. */ -static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, - const struct cpumask *pcpumask, - const struct cpumask *cbcpumask) +static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) { + struct padata_instance *pinst = ps->pinst; + const struct cpumask *cbcpumask; + const struct cpumask *pcpumask; struct parallel_data *pd; + cbcpumask = pinst->rcpumask.cbcpu; + pcpumask = pinst->rcpumask.pcpu; + pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL); if (!pd) goto err; @@ -432,15 +448,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, if (!pd->squeue) goto err_free_pqueue; - pd->pinst = pinst; - if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) + pd->ps = ps; + if (pd_setup_cpumasks(pd, pcpumask, cbcpumask)) goto err_free_squeue; padata_init_pqueues(pd); padata_init_squeues(pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); INIT_WORK(&pd->reorder_work, invoke_padata_reorder); @@ -466,29 +482,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -502,39 +495,63 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ -static void padata_replace(struct padata_instance *pinst, - struct parallel_data *pd_new) +static int padata_replace_one(struct padata_shell *ps) { - struct parallel_data *pd_old = pinst->pd; - int notification_mask = 0; + struct parallel_data *pd_new; - pinst->flags |= PADATA_RESET; + pd_new = padata_alloc_pd(ps); + if (!pd_new) + return -ENOMEM; - rcu_assign_pointer(pinst->pd, pd_new); + ps->opd = rcu_dereference_protected(ps->pd, 1); + rcu_assign_pointer(ps->pd, pd_new); - synchronize_rcu(); + return 0; +} + +static int padata_replace(struct padata_instance *pinst) +{ + int notification_mask = 0; + struct padata_shell *ps; + int err; - if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu)) + pinst->flags |= PADATA_RESET; + + cpumask_copy(pinst->omask, pinst->rcpumask.pcpu); + cpumask_and(pinst->rcpumask.pcpu, pinst->cpumask.pcpu, + cpu_online_mask); + if (!cpumask_equal(pinst->omask, pinst->rcpumask.pcpu)) notification_mask |= PADATA_CPU_PARALLEL; - if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) + + cpumask_copy(pinst->omask, pinst->rcpumask.cbcpu); + cpumask_and(pinst->rcpumask.cbcpu, pinst->cpumask.cbcpu, + cpu_online_mask); + if (!cpumask_equal(pinst->omask, pinst->rcpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + list_for_each_entry(ps, &pinst->pslist, list) { + err = padata_replace_one(ps); + if (err) + break; + } + + synchronize_rcu(); + + list_for_each_entry_continue_reverse(ps, &pinst->pslist, list) + if (atomic_dec_and_test(&ps->opd->refcnt)) + padata_free_pd(ps->opd); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, notification_mask, - &pd_new->cpumask); + &pinst->cpumask); pinst->flags &= ~PADATA_RESET; + + return err; } /** @@ -587,7 +604,7 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, cpumask_var_t cbcpumask) { int valid; - struct parallel_data *pd; + int err; valid = padata_validate_cpumask(pinst, pcpumask); if (!valid) { @@ -600,19 +617,15 @@ static int __padata_set_cpumasks(struct padata_instance *pinst, __padata_stop(pinst); out_replace: - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) - return -ENOMEM; - cpumask_copy(pinst->cpumask.pcpu, pcpumask); cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); - padata_replace(pinst, pd); + err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst); if (valid) __padata_start(pinst); - return 0; + return err; } /** @@ -695,46 +708,32 @@ EXPORT_SYMBOL(padata_stop); static int __padata_add_cpu(struct padata_instance *pinst, int cpu) { - struct parallel_data *pd; + int err = 0; if (cpumask_test_cpu(cpu, cpu_online_mask)) { - pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, - pinst->cpumask.cbcpu); - if (!pd) - return -ENOMEM; - - padata_replace(pinst, pd); + err = padata_replace(pinst); if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) && padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) __padata_start(pinst); } - return 0; + return err; } static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) { - struct parallel_data *pd = NULL; - - if (cpumask_test_cpu(cpu, cpu_online_mask)) { + int err = 0; + if (!cpumask_test_cpu(cpu, cpu_online_mask)) { if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) || !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu)) __padata_stop(pinst); - pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, - pinst->cpumask.cbcpu); - if (!pd) - return -ENOMEM; - - padata_replace(pinst, pd); - - cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); - cpumask_clear_cpu(cpu, pd->cpumask.pcpu); + err = padata_replace(pinst); } - return 0; + return err; } /** @@ -793,7 +792,7 @@ static int padata_cpu_online(unsigned int cpu, struct hlist_node *node) return ret; } -static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node) +static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node) { struct padata_instance *pinst; int ret; @@ -814,11 +813,16 @@ static enum cpuhp_state hp_online; static void __padata_free(struct padata_instance *pinst) { #ifdef CONFIG_HOTPLUG_CPU + cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD, &pinst->node); cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node); #endif + WARN_ON(!list_empty(&pinst->pslist)); + padata_stop(pinst); - padata_free_pd(pinst->pd); + free_cpumask_var(pinst->omask); + free_cpumask_var(pinst->rcpumask.cbcpu); + free_cpumask_var(pinst->rcpumask.pcpu); free_cpumask_var(pinst->cpumask.pcpu); free_cpumask_var(pinst->cpumask.cbcpu); destroy_workqueue(pinst->serial_wq); @@ -965,7 +969,6 @@ static struct padata_instance *padata_alloc(const char *name, const struct cpumask *cbcpumask) { struct padata_instance *pinst; - struct parallel_data *pd = NULL; pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL); if (!pinst) @@ -993,14 +996,22 @@ static struct padata_instance *padata_alloc(const char *name, !padata_validate_cpumask(pinst, cbcpumask)) goto err_free_masks; - pd = padata_alloc_pd(pinst, pcpumask, cbcpumask); - if (!pd) + if (!alloc_cpumask_var(&pinst->rcpumask.pcpu, GFP_KERNEL)) goto err_free_masks; + if (!alloc_cpumask_var(&pinst->rcpumask.cbcpu, GFP_KERNEL)) + goto err_free_rcpumask_pcpu; + if (!alloc_cpumask_var(&pinst->omask, GFP_KERNEL)) + goto err_free_rcpumask_cbcpu; - rcu_assign_pointer(pinst->pd, pd); + INIT_LIST_HEAD(&pinst->pslist); cpumask_copy(pinst->cpumask.pcpu, pcpumask); cpumask_copy(pinst->cpumask.cbcpu, cbcpumask); + cpumask_and(pinst->rcpumask.pcpu, pcpumask, cpu_online_mask); + cpumask_and(pinst->rcpumask.cbcpu, cbcpumask, cpu_online_mask); + + if (padata_setup_cpumasks(pinst)) + goto err_free_omask; pinst->flags = 0; @@ -1010,12 +1021,20 @@ static struct padata_instance *padata_alloc(const char *name, #ifdef CONFIG_HOTPLUG_CPU cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node); + cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD, + &pinst->node); #endif put_online_cpus(); return pinst; +err_free_omask: + free_cpumask_var(pinst->omask); +err_free_rcpumask_cbcpu: + free_cpumask_var(pinst->rcpumask.cbcpu); +err_free_rcpumask_pcpu: + free_cpumask_var(pinst->rcpumask.pcpu); err_free_masks: free_cpumask_var(pinst->cpumask.pcpu); free_cpumask_var(pinst->cpumask.cbcpu); @@ -1054,6 +1073,61 @@ void padata_free(struct padata_instance *pinst) } EXPORT_SYMBOL(padata_free); +/** + * padata_alloc_shell - Allocate and initialize padata shell. + * + * @pinst: Parent padata_instance object. + */ +struct padata_shell *padata_alloc_shell(struct padata_instance *pinst) +{ + struct parallel_data *pd; + struct padata_shell *ps; + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto out; + + ps->pinst = pinst; + + get_online_cpus(); + pd = padata_alloc_pd(ps); + put_online_cpus(); + + if (!pd) + goto out_free_ps; + + mutex_lock(&pinst->lock); + RCU_INIT_POINTER(ps->pd, pd); + list_add(&ps->list, &pinst->pslist); + mutex_unlock(&pinst->lock); + + return ps; + +out_free_ps: + kfree(ps); +out: + return NULL; +} +EXPORT_SYMBOL(padata_alloc_shell); + +/** + * padata_free_shell - free a padata shell + * + * @ps: padata shell to free + */ +void padata_free_shell(struct padata_shell *ps) +{ + struct padata_instance *pinst = ps->pinst; + + mutex_lock(&pinst->lock); + list_del(&ps->list); + padata_free_pd(rcu_dereference_protected(ps->pd, 1)); + mutex_unlock(&pinst->lock); + + kfree(ps); +} +EXPORT_SYMBOL(padata_free_shell); + #ifdef CONFIG_HOTPLUG_CPU static __init int padata_driver_init(void) @@ -1061,17 +1135,24 @@ static __init int padata_driver_init(void) int ret; ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online", - padata_cpu_online, - padata_cpu_prep_down); + padata_cpu_online, NULL); if (ret < 0) return ret; hp_online = ret; + + ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead", + NULL, padata_cpu_dead); + if (ret < 0) { + cpuhp_remove_multi_state(hp_online); + return ret; + } return 0; } module_init(padata_driver_init); static __exit void padata_driver_exit(void) { + cpuhp_remove_multi_state(CPUHP_PADATA_DEAD); cpuhp_remove_multi_state(hp_online); } module_exit(padata_driver_exit); diff --git a/kernel/pid.c b/kernel/pid.c index 2278e249141d..73c507d5ba5e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,6 +247,8 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, tmp = tmp->parent; } + retval = -ENOMEM; + if (unlikely(is_child_reaper(pid))) { if (pid_ns_prepare_proc(ns)) goto out_free; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f3b7239f1892..27f149f5d4a9 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -131,11 +131,12 @@ static void s2idle_loop(void) * to avoid them upfront. */ for (;;) { - if (s2idle_ops && s2idle_ops->wake) - s2idle_ops->wake(); - - if (pm_wakeup_pending()) + if (s2idle_ops && s2idle_ops->wake) { + if (s2idle_ops->wake()) + break; + } else if (pm_wakeup_pending()) { break; + } pm_wakeup_clear(false); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 1ef6f75d92f1..fada22dc4ab6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2770,8 +2770,6 @@ void register_console(struct console *newcon) * for us. */ logbuf_lock_irqsave(flags); - console_seq = syslog_seq; - console_idx = syslog_idx; /* * We're about to replay the log buffer. Only do this to the * just-registered console to avoid excessive message spam to @@ -2783,6 +2781,8 @@ void register_console(struct console *newcon) */ exclusive_console = newcon; exclusive_console_stop_seq = console_seq; + console_seq = syslog_seq; + console_idx = syslog_idx; logbuf_unlock_irqrestore(flags); } console_unlock(); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 5dffade2d7cd..21acdff3bd27 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -530,7 +530,7 @@ static void srcu_gp_end(struct srcu_struct *ssp) idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); cbdelay = srcu_get_delay(ssp); - ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns()); rcu_seq_end(&ssp->srcu_gp_seq); gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) @@ -762,6 +762,7 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) unsigned long flags; struct srcu_data *sdp; unsigned long t; + unsigned long tlast; /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); @@ -780,9 +781,9 @@ static bool srcu_might_be_idle(struct srcu_struct *ssp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); + tlast = READ_ONCE(ssp->srcu_last_gp_end); if (exp_holdoff == 0 || - time_in_range_open(t, ssp->srcu_last_gp_end, - ssp->srcu_last_gp_end + exp_holdoff)) + time_in_range_open(t, tlast, tlast + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1694a6b57ad8..6145e08a1407 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -577,7 +577,7 @@ static void rcu_eqs_enter(bool user) } lockdep_assert_irqs_disabled(); - trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks); + trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); rdp = this_cpu_ptr(&rcu_data); do_nocb_deferred_wakeup(rdp); @@ -650,14 +650,15 @@ static __always_inline void rcu_nmi_exit_common(bool irq) * leave it in non-RCU-idle state. */ if (rdp->dynticks_nmi_nesting != 1) { - trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks); + trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, + atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */ rdp->dynticks_nmi_nesting - 2); return; } /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ - trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks); + trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ if (irq) @@ -744,7 +745,7 @@ static void rcu_eqs_exit(bool user) rcu_dynticks_task_exit(); rcu_dynticks_eqs_exit(); rcu_cleanup_after_idle(); - trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks); + trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); WRITE_ONCE(rdp->dynticks_nesting, 1); WARN_ON_ONCE(rdp->dynticks_nmi_nesting); @@ -833,7 +834,7 @@ static __always_inline void rcu_nmi_enter_common(bool irq) } trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), rdp->dynticks_nmi_nesting, - rdp->dynticks_nmi_nesting + incby, rdp->dynticks); + rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks)); WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */ rdp->dynticks_nmi_nesting + incby); barrier(); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index d632cd019597..df90d4d7ad2e 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -134,7 +134,7 @@ static void __maybe_unused sync_exp_reset_tree(void) rcu_for_each_node_breadth_first(rnp) { raw_spin_lock_irqsave_rcu_node(rnp, flags); WARN_ON_ONCE(rnp->expmask); - rnp->expmask = rnp->expmaskinit; + WRITE_ONCE(rnp->expmask, rnp->expmaskinit); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } } @@ -211,7 +211,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, rnp = rnp->parent; raw_spin_lock_rcu_node(rnp); /* irqs already disabled */ WARN_ON_ONCE(!(rnp->expmask & mask)); - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); } } @@ -241,7 +241,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); return; } - rnp->expmask &= ~mask; + WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask); __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */ } @@ -372,12 +372,10 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); /* IPI the remaining CPUs for expedited quiescent state. */ - for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { + for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) { unsigned long mask = leaf_node_cpu_bit(rnp, cpu); struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - if (!(mask_ofl_ipi & mask)) - continue; retry_ipi: if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) { mask_ofl_test |= mask; @@ -491,7 +489,7 @@ static void synchronize_sched_expedited_wait(void) struct rcu_data *rdp; mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; ndetected++; rdp = per_cpu_ptr(&rcu_data, cpu); @@ -503,7 +501,8 @@ static void synchronize_sched_expedited_wait(void) } pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", jiffies - jiffies_start, rcu_state.expedited_sequence, - rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); + READ_ONCE(rnp_root->expmask), + ".T"[!!rnp_root->exp_tasks]); if (ndetected) { pr_err("blocking rcu_node structures:"); rcu_for_each_node_breadth_first(rnp) { @@ -513,7 +512,7 @@ static void synchronize_sched_expedited_wait(void) continue; pr_cont(" l=%u:%d-%d:%#lx/%c", rnp->level, rnp->grplo, rnp->grphi, - rnp->expmask, + READ_ONCE(rnp->expmask), ".T"[!!rnp->exp_tasks]); } pr_cont("\n"); @@ -521,7 +520,7 @@ static void synchronize_sched_expedited_wait(void) rcu_for_each_leaf_node(rnp) { for_each_leaf_node_possible_cpu(rnp, cpu) { mask = leaf_node_cpu_bit(rnp, cpu); - if (!(rnp->expmask & mask)) + if (!(READ_ONCE(rnp->expmask) & mask)) continue; dump_cpu_task(cpu); } @@ -541,14 +540,13 @@ static void rcu_exp_wait_wake(unsigned long s) struct rcu_node *rnp; synchronize_sched_expedited_wait(); - rcu_exp_gp_seq_end(); - trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); - /* - * Switch over to wakeup mode, allowing the next GP, but -only- the - * next GP, to proceed. - */ + // Switch over to wakeup mode, allowing the next GP to proceed. + // End the previous grace period only after acquiring the mutex + // to ensure that only one GP runs concurrently with wakeups. mutex_lock(&rcu_state.exp_wake_mutex); + rcu_exp_gp_seq_end(); + trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end")); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { @@ -559,7 +557,7 @@ static void rcu_exp_wait_wake(unsigned long s) spin_unlock(&rnp->exp_lock); } smp_mb(); /* All above changes before wakeup. */ - wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rcu_state.expedited_sequence) & 0x3]); + wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]); } trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake")); mutex_unlock(&rcu_state.exp_wake_mutex); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index fa08d55f7040..f7118842a2b8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -220,7 +220,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp) * blocked tasks. */ if (!rnp->gp_tasks && (blkd_state & RCU_GP_BLKD)) { - rnp->gp_tasks = &t->rcu_node_entry; + WRITE_ONCE(rnp->gp_tasks, &t->rcu_node_entry); WARN_ON_ONCE(rnp->completedqs == rnp->gp_seq); } if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD)) @@ -340,7 +340,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); */ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) { - return rnp->gp_tasks != NULL; + return READ_ONCE(rnp->gp_tasks) != NULL; } /* Bias and limit values for ->rcu_read_lock_nesting. */ @@ -493,7 +493,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) trace_rcu_unlock_preempted_task(TPS("rcu_preempt"), rnp->gp_seq, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) - rnp->gp_tasks = np; + WRITE_ONCE(rnp->gp_tasks, np); if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; if (IS_ENABLED(CONFIG_RCU_BOOST)) { @@ -612,7 +612,7 @@ static void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.exp_hint = false; exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) || - (rdp->grpmask & rnp->expmask) || + (rdp->grpmask & READ_ONCE(rnp->expmask)) || tick_nohz_full_cpu(rdp->cpu); // Need to defer quiescent state until everything is enabled. if (irqs_were_disabled && use_softirq && @@ -663,7 +663,7 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) dump_blkd_tasks(rnp, 10); if (rcu_preempt_has_tasks(rnp) && (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { - rnp->gp_tasks = rnp->blkd_tasks.next; + WRITE_ONCE(rnp->gp_tasks, rnp->blkd_tasks.next); t = container_of(rnp->gp_tasks, struct task_struct, rcu_node_entry); trace_rcu_unlock_preempted_task(TPS("rcu_preempt-GPS"), @@ -757,7 +757,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext); pr_info("%s: ->gp_tasks %p ->boost_tasks %p ->exp_tasks %p\n", - __func__, rnp->gp_tasks, rnp->boost_tasks, rnp->exp_tasks); + __func__, READ_ONCE(rnp->gp_tasks), rnp->boost_tasks, + rnp->exp_tasks); pr_info("%s: ->blkd_tasks", __func__); i = 0; list_for_each(lhp, &rnp->blkd_tasks) { @@ -2321,6 +2322,8 @@ static void __init rcu_organize_nocb_kthreads(void) { int cpu; bool firsttime = true; + bool gotnocbs = false; + bool gotnocbscbs = true; int ls = rcu_nocb_gp_stride; int nl = 0; /* Next GP kthread. */ struct rcu_data *rdp; @@ -2343,21 +2346,31 @@ static void __init rcu_organize_nocb_kthreads(void) rdp = per_cpu_ptr(&rcu_data, cpu); if (rdp->cpu >= nl) { /* New GP kthread, set up for CBs & next GP. */ + gotnocbs = true; nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; rdp->nocb_gp_rdp = rdp; rdp_gp = rdp; - if (!firsttime && dump_tree) - pr_cont("\n"); - firsttime = false; - pr_alert("%s: No-CB GP kthread CPU %d:", __func__, cpu); + if (dump_tree) { + if (!firsttime) + pr_cont("%s\n", gotnocbscbs + ? "" : " (self only)"); + gotnocbscbs = false; + firsttime = false; + pr_alert("%s: No-CB GP kthread CPU %d:", + __func__, cpu); + } } else { /* Another CB kthread, link to previous GP kthread. */ + gotnocbscbs = true; rdp->nocb_gp_rdp = rdp_gp; rdp_prev->nocb_next_cb_rdp = rdp; - pr_alert(" %d", cpu); + if (dump_tree) + pr_cont(" %d", cpu); } rdp_prev = rdp; } + if (gotnocbs && dump_tree) + pr_cont("%s\n", gotnocbscbs ? "" : " (self only)"); } /* diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5662b5..ac31239aa51a 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -16,14 +16,20 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer endif -obj-y += core.o loadavg.o clock.o cputime.o -obj-y += idle.o fair.o rt.o deadline.o -obj-y += wait.o wait_bit.o swait.o completion.o - -obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o +ifdef CONFIG_SCHED_BMQ +obj-y += bmq.o bmq_debug.o +else +obj-y += core.o +obj-y += fair.o rt.o deadline.o +obj-$(CONFIG_SMP) += cpudeadline.o stop_task.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o -obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o +endif +obj-y += loadavg.o clock.o cputime.o +obj-y += idle.o +obj-y += wait.o wait_bit.o swait.o completion.o +obj-$(CONFIG_SMP) += cpupri.o pelt.o topology.o +obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o diff --git a/kernel/sched/bmq.c b/kernel/sched/bmq.c new file mode 100644 index 000000000000..6a5ab93a30bb --- /dev/null +++ b/kernel/sched/bmq.c @@ -0,0 +1,5999 @@ +/* + * kernel/sched/bmq.c + * + * BMQ Core kernel scheduler code and related syscalls + * + * Copyright (C) 1991-2002 Linus Torvalds + * + * 2009-08-13 Brainfuck deadline scheduling policy by Con Kolivas deletes + * a whole lot of those previous things. + * 2017-09-06 Priority and Deadline based Skip list multiple queue kernel + * scheduler by Alfred Chen. + * 2019-02-20 BMQ(BitMap Queue) kernel scheduler by Alfred Chen. + */ +#include "bmq_sched.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "../workqueue_internal.h" +#include "../../fs/io-wq.h" +#include "../smpboot.h" + +#include "pelt.h" + +#define CREATE_TRACE_POINTS +#include + +/* rt_prio(prio) defined in include/linux/sched/rt.h */ +#define rt_task(p) rt_prio((p)->prio) +#define rt_policy(policy) ((policy) == SCHED_FIFO || (policy) == SCHED_RR) +#define task_has_rt_policy(p) (rt_policy((p)->policy)) + +#define STOP_PRIO (MAX_RT_PRIO - 1) + +/* Default time slice is 4 in ms, can be set via kernel parameter "bmq.timeslice" */ +u64 sched_timeslice_ns __read_mostly = (4 * 1000 * 1000); + +static int __init sched_timeslice(char *str) +{ + int timeslice_us; + + get_option(&str, ×lice_us); + if (timeslice_us >= 1000) + sched_timeslice_ns = timeslice_us * 1000; + + return 0; +} +early_param("bmq.timeslice", sched_timeslice); + +/* Reschedule if less than this many ΞΌs left */ +#define RESCHED_NS (100 * 1000) + +static inline void print_scheduler_version(void) +{ + printk(KERN_INFO "bmq: BMQ CPU Scheduler 5.5-r3 by Alfred Chen.\n"); +} + +/** + * sched_yield_type - Choose what sort of yield sched_yield will perform. + * 0: No yield. + * 1: Deboost and requeue task. (default) + * 2: Set rq skip task. + */ +int sched_yield_type __read_mostly = 1; + +#define rq_switch_time(rq) ((rq)->clock - (rq)->last_ts_switch) +#define boost_threshold(p) (sched_timeslice_ns >>\ + (10 - MAX_PRIORITY_ADJ - (p)->boost_prio)) + +static inline void boost_task(struct task_struct *p) +{ + int limit; + + switch (p->policy) { + case SCHED_NORMAL: + limit = -MAX_PRIORITY_ADJ; + break; + case SCHED_BATCH: + case SCHED_IDLE: + limit = 0; + break; + default: + return; + } + + if (p->boost_prio > limit) + p->boost_prio--; +} + +static inline void deboost_task(struct task_struct *p) +{ + if (p->boost_prio < MAX_PRIORITY_ADJ) + p->boost_prio++; +} + +#ifdef CONFIG_SMP +static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp; + +DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); +DEFINE_PER_CPU(cpumask_t *, sched_cpu_affinity_end_mask); +DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask); + +#ifdef CONFIG_SCHED_SMT +DEFINE_STATIC_KEY_FALSE(sched_smt_present); +EXPORT_SYMBOL_GPL(sched_smt_present); +#endif + +/* + * Keep a unique ID per domain (we use the first CPUs number in the cpumask of + * the domain), this allows us to quickly tell if two cpus are in the same cache + * domain, see cpus_share_cache(). + */ +DEFINE_PER_CPU(int, sd_llc_id); +#endif /* CONFIG_SMP */ + +static DEFINE_MUTEX(sched_hotcpu_mutex); + +DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + +#ifndef prepare_arch_switch +# define prepare_arch_switch(next) do { } while (0) +#endif +#ifndef finish_arch_post_lock_switch +# define finish_arch_post_lock_switch() do { } while (0) +#endif + +#define IDLE_WM (IDLE_TASK_SCHED_PRIO) + +static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp; +static cpumask_t sched_rq_watermark[bmq_BITS] ____cacheline_aligned_in_smp; + +#if (bmq_BITS <= BITS_PER_LONG) +#define bmq_find_first_bit(bm) __ffs((bm[0])) +#define bmq_find_next_bit(bm, start) __ffs(BITMAP_FIRST_WORD_MASK(start) & bm[0]) +#else +#define bmq_find_first_bit(bm) find_first_bit((bm), bmq_BITS) +#define bmq_find_next_bit(bm, start) find_next_bit(bm, bmq_BITS, start) +#endif + +static inline void update_sched_rq_watermark(struct rq *rq) +{ + unsigned long watermark = bmq_find_first_bit(rq->queue.bitmap); + unsigned long last_wm = rq->watermark; + unsigned long i; + int cpu; + + if (watermark == last_wm) + return; + + rq->watermark = watermark; + cpu = cpu_of(rq); + if (watermark < last_wm) { + for (i = watermark + 1; i <= last_wm; i++) + cpumask_andnot(&sched_rq_watermark[i], + &sched_rq_watermark[i], cpumask_of(cpu)); +#ifdef CONFIG_SCHED_SMT + if (!static_branch_likely(&sched_smt_present)) + return; + if (IDLE_WM == last_wm) + cpumask_andnot(&sched_sg_idle_mask, + &sched_sg_idle_mask, cpu_smt_mask(cpu)); +#endif + return; + } + /* last_wm < watermark */ + for (i = last_wm + 1; i <= watermark; i++) + cpumask_set_cpu(cpu, &sched_rq_watermark[i]); +#ifdef CONFIG_SCHED_SMT + if (!static_branch_likely(&sched_smt_present)) + return; + if (IDLE_WM == watermark) { + cpumask_t tmp; + cpumask_and(&tmp, cpu_smt_mask(cpu), &sched_rq_watermark[IDLE_WM]); + if (cpumask_equal(&tmp, cpu_smt_mask(cpu))) + cpumask_or(&sched_sg_idle_mask, cpu_smt_mask(cpu), + &sched_sg_idle_mask); + } +#endif +} + +static inline int task_sched_prio(struct task_struct *p) +{ + return (p->prio < MAX_RT_PRIO)? 0:p->prio - MAX_RT_PRIO + p->boost_prio + 1; +} + +static inline void bmq_init(struct bmq *q) +{ + int i; + + bitmap_zero(q->bitmap, bmq_BITS); + for(i = 0; i < bmq_BITS; i++) + INIT_LIST_HEAD(&q->heads[i]); +} + +static inline void bmq_init_idle(struct bmq *q, struct task_struct *idle) +{ + INIT_LIST_HEAD(&q->heads[IDLE_TASK_SCHED_PRIO]); + list_add(&idle->bmq_node, &q->heads[IDLE_TASK_SCHED_PRIO]); + set_bit(IDLE_TASK_SCHED_PRIO, q->bitmap); +} + +static inline void bmq_add_task(struct task_struct *p, struct bmq *q, int idx) +{ + struct list_head *n; + + if (likely(idx)) { + list_add_tail(&p->bmq_node, &q->heads[idx]); + return; + } + + list_for_each(n, &q->heads[idx]) + if (list_entry(n, struct task_struct, bmq_node)->prio > p->prio) + break; + __list_add(&p->bmq_node, n->prev, n); +} + +/* + * This routine used in bmq scheduler only which assume the idle task in the bmq + */ +static inline struct task_struct *rq_first_bmq_task(struct rq *rq) +{ + unsigned long idx = bmq_find_first_bit(rq->queue.bitmap); + const struct list_head *head = &rq->queue.heads[idx]; + + return list_first_entry(head, struct task_struct, bmq_node); +} + +static inline struct task_struct * +rq_next_bmq_task(struct task_struct *p, struct rq *rq) +{ + unsigned long idx = p->bmq_idx; + struct list_head *head = &rq->queue.heads[idx]; + + if (list_is_last(&p->bmq_node, head)) { + idx = bmq_find_next_bit(rq->queue.bitmap, idx + 1); + head = &rq->queue.heads[idx]; + + return list_first_entry(head, struct task_struct, bmq_node); + } + + return list_next_entry(p, bmq_node); +} + +static inline struct task_struct *rq_runnable_task(struct rq *rq) +{ + struct task_struct *next = rq_first_bmq_task(rq); + + if (unlikely(next == rq->skip)) + next = rq_next_bmq_task(next, rq); + + return next; +} + +/* + * Context: p->pi_lock + */ +static inline struct rq +*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock) +{ + struct rq *rq; + for (;;) { + rq = task_rq(p); + if (p->on_cpu || task_on_rq_queued(p)) { + raw_spin_lock(&rq->lock); + if (likely((p->on_cpu || task_on_rq_queued(p)) + && rq == task_rq(p))) { + *plock = &rq->lock; + return rq; + } + raw_spin_unlock(&rq->lock); + } else if (task_on_rq_migrating(p)) { + do { + cpu_relax(); + } while (unlikely(task_on_rq_migrating(p))); + } else { + *plock = NULL; + return rq; + } + } +} + +static inline void +__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock) +{ + if (NULL != lock) + raw_spin_unlock(lock); +} + +static inline struct rq +*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock, + unsigned long *flags) +{ + struct rq *rq; + for (;;) { + rq = task_rq(p); + if (p->on_cpu || task_on_rq_queued(p)) { + raw_spin_lock_irqsave(&rq->lock, *flags); + if (likely((p->on_cpu || task_on_rq_queued(p)) + && rq == task_rq(p))) { + *plock = &rq->lock; + return rq; + } + raw_spin_unlock_irqrestore(&rq->lock, *flags); + } else if (task_on_rq_migrating(p)) { + do { + cpu_relax(); + } while (unlikely(task_on_rq_migrating(p))); + } else { + raw_spin_lock_irqsave(&p->pi_lock, *flags); + if (likely(!p->on_cpu && !p->on_rq && + rq == task_rq(p))) { + *plock = &p->pi_lock; + return rq; + } + raw_spin_unlock_irqrestore(&p->pi_lock, *flags); + } + } +} + +static inline void +task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock, + unsigned long *flags) +{ + raw_spin_unlock_irqrestore(lock, *flags); +} + +/* + * __task_rq_lock - lock the rq @p resides on. + */ +struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) + __acquires(rq->lock) +{ + struct rq *rq; + + lockdep_assert_held(&p->pi_lock); + + for (;;) { + rq = task_rq(p); + raw_spin_lock(&rq->lock); + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) + return rq; + raw_spin_unlock(&rq->lock); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +/* + * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. + */ +struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) + __acquires(p->pi_lock) + __acquires(rq->lock) +{ + struct rq *rq; + + for (;;) { + raw_spin_lock_irqsave(&p->pi_lock, rf->flags); + rq = task_rq(p); + raw_spin_lock(&rq->lock); + /* + * move_queued_task() task_rq_lock() + * + * ACQUIRE (rq->lock) + * [S] ->on_rq = MIGRATING [L] rq = task_rq() + * WMB (__set_task_cpu()) ACQUIRE (rq->lock); + * [S] ->cpu = new_cpu [L] task_rq() + * [L] ->on_rq + * RELEASE (rq->lock) + * + * If we observe the old CPU in task_rq_lock(), the acquire of + * the old rq->lock will fully serialize against the stores. + * + * If we observe the new CPU in task_rq_lock(), the address + * dependency headed by '[L] rq = task_rq()' and the acquire + * will pair with the WMB to ensure we then also see migrating. + */ + if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { + return rq; + } + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); + + while (unlikely(task_on_rq_migrating(p))) + cpu_relax(); + } +} + +/* + * RQ-clock updating methods: + */ + +static void update_rq_clock_task(struct rq *rq, s64 delta) +{ +/* + * In theory, the compile should just see 0 here, and optimize out the call + * to sched_rt_avg_update. But I don't trust it... + */ + s64 __maybe_unused steal = 0, irq_delta = 0; + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; + + /* + * Since irq_time is only updated on {soft,}irq_exit, we might run into + * this case when a previous update_rq_clock() happened inside a + * {soft,}irq region. + * + * When this happens, we stop ->clock_task and only update the + * prev_irq_time stamp to account for the part that fit, so that a next + * update will consume the rest. This ensures ->clock_task is + * monotonic. + * + * It does however cause some slight miss-attribution of {soft,}irq + * time, a more accurate solution would be to update the irq_time using + * the current rq->clock timestamp, except that would require using + * atomic ops. + */ + if (irq_delta > delta) + irq_delta = delta; + + rq->prev_irq_time += irq_delta; + delta -= irq_delta; +#endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (static_key_false((¶virt_steal_rq_enabled))) { + steal = paravirt_steal_clock(cpu_of(rq)); + steal -= rq->prev_steal_time_rq; + + if (unlikely(steal > delta)) + steal = delta; + + rq->prev_steal_time_rq += steal; + delta -= steal; + } +#endif + + rq->clock_task += delta; + +#ifdef CONFIG_HAVE_SCHED_AVG_IRQ + if ((irq_delta + steal)) + update_irq_load_avg(rq, irq_delta + steal); +#endif +} + +static inline void update_rq_clock(struct rq *rq) +{ + s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; + + if (unlikely(delta <= 0)) + return; + rq->clock += delta; + update_rq_clock_task(rq, delta); +} + +#ifdef CONFIG_NO_HZ_FULL +/* + * Tick may be needed by tasks in the runqueue depending on their policy and + * requirements. If tick is needed, lets send the target an IPI to kick it out + * of nohz mode if necessary. + */ +static inline void sched_update_tick_dependency(struct rq *rq) +{ + int cpu; + + if (!tick_nohz_full_enabled()) + return; + + cpu = cpu_of(rq); + + if (!tick_nohz_full_cpu(cpu)) + return; + + if (rq->nr_running < 2) + tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED); + else + tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED); +} +#else /* !CONFIG_NO_HZ_FULL */ +static inline void sched_update_tick_dependency(struct rq *rq) { } +#endif + +/* + * Add/Remove/Requeue task to/from the runqueue routines + * Context: rq->lock + */ +static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags) +{ + lockdep_assert_held(&rq->lock); + + WARN_ONCE(task_rq(p) != rq, "bmq: dequeue task reside on cpu%d from cpu%d\n", + task_cpu(p), cpu_of(rq)); + + list_del(&p->bmq_node); + if (list_empty(&rq->queue.heads[p->bmq_idx])) { + clear_bit(p->bmq_idx, rq->queue.bitmap); + update_sched_rq_watermark(rq); + } + --rq->nr_running; +#ifdef CONFIG_SMP + if (1 == rq->nr_running) + cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask); +#endif + + sched_update_tick_dependency(rq); + psi_dequeue(p, flags & DEQUEUE_SLEEP); + + sched_info_dequeued(rq, p); +} + +static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags) +{ + lockdep_assert_held(&rq->lock); + + WARN_ONCE(task_rq(p) != rq, "bmq: enqueue task reside on cpu%d to cpu%d\n", + task_cpu(p), cpu_of(rq)); + + p->bmq_idx = task_sched_prio(p); + bmq_add_task(p, &rq->queue, p->bmq_idx); + set_bit(p->bmq_idx, rq->queue.bitmap); + update_sched_rq_watermark(rq); + ++rq->nr_running; +#ifdef CONFIG_SMP + if (2 == rq->nr_running) + cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask); +#endif + + sched_update_tick_dependency(rq); + + sched_info_queued(rq, p); + psi_enqueue(p, flags); + + /* + * If in_iowait is set, the code below may not trigger any cpufreq + * utilization updates, so do it here explicitly with the IOWAIT flag + * passed. + */ + if (p->in_iowait) + cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); +} + +static inline void requeue_task(struct task_struct *p, struct rq *rq) +{ + int idx = task_sched_prio(p); + + lockdep_assert_held(&rq->lock); + WARN_ONCE(task_rq(p) != rq, "bmq: cpu[%d] requeue task reside on cpu%d\n", + cpu_of(rq), task_cpu(p)); + + list_del(&p->bmq_node); + bmq_add_task(p, &rq->queue, idx); + if (idx != p->bmq_idx) { + if (list_empty(&rq->queue.heads[p->bmq_idx])) + clear_bit(p->bmq_idx, rq->queue.bitmap); + p->bmq_idx = idx; + set_bit(p->bmq_idx, rq->queue.bitmap); + update_sched_rq_watermark(rq); + } +} + +/* + * cmpxchg based fetch_or, macro so it works for different integer types + */ +#define fetch_or(ptr, mask) \ + ({ \ + typeof(ptr) _ptr = (ptr); \ + typeof(mask) _mask = (mask); \ + typeof(*_ptr) _old, _val = *_ptr; \ + \ + for (;;) { \ + _old = cmpxchg(_ptr, _val, _val | _mask); \ + if (_old == _val) \ + break; \ + _val = _old; \ + } \ + _old; \ +}) + +#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) +/* + * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, + * this avoids any races wrt polling state changes and thereby avoids + * spurious IPIs. + */ +static bool set_nr_and_not_polling(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); + return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG); +} + +/* + * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set. + * + * If this returns true, then the idle task promises to call + * sched_ttwu_pending() and reschedule soon. + */ +static bool set_nr_if_polling(struct task_struct *p) +{ + struct thread_info *ti = task_thread_info(p); + typeof(ti->flags) old, val = READ_ONCE(ti->flags); + + for (;;) { + if (!(val & _TIF_POLLING_NRFLAG)) + return false; + if (val & _TIF_NEED_RESCHED) + return true; + old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED); + if (old == val) + break; + val = old; + } + return true; +} + +#else +static bool set_nr_and_not_polling(struct task_struct *p) +{ + set_tsk_need_resched(p); + return true; +} + +#ifdef CONFIG_SMP +static bool set_nr_if_polling(struct task_struct *p) +{ + return false; +} +#endif +#endif + +static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + struct wake_q_node *node = &task->wake_q; + + /* + * Atomically grab the task, if ->wake_q is !nil already it means + * its already queued (either by us or someone else) and will get the + * wakeup due to that. + * + * In order to ensure that a pending wakeup will observe our pending + * state, even in the failed case, an explicit smp_mb() must be used. + */ + smp_mb__before_atomic(); + if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))) + return false; + + /* + * The head is context local, there can be no concurrency. + */ + *head->lastp = node; + head->lastp = &node->next; + return true; +} + +/** + * wake_q_add() - queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + */ +void wake_q_add(struct wake_q_head *head, struct task_struct *task) +{ + if (__wake_q_add(head, task)) + get_task_struct(task); +} + +/** + * wake_q_add_safe() - safely queue a wakeup for 'later' waking. + * @head: the wake_q_head to add @task to + * @task: the task to queue for 'later' wakeup + * + * Queue a task for later wakeup, most likely by the wake_up_q() call in the + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come + * instantly. + * + * This function must be used as-if it were wake_up_process(); IOW the task + * must be ready to be woken at this location. + * + * This function is essentially a task-safe equivalent to wake_q_add(). Callers + * that already hold reference to @task can call the 'safe' version and trust + * wake_q to do the right thing depending whether or not the @task is already + * queued for wakeup. + */ +void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task) +{ + if (!__wake_q_add(head, task)) + put_task_struct(task); +} + +void wake_up_q(struct wake_q_head *head) +{ + struct wake_q_node *node = head->first; + + while (node != WAKE_Q_TAIL) { + struct task_struct *task; + + task = container_of(node, struct task_struct, wake_q); + BUG_ON(!task); + /* task can safely be re-inserted now: */ + node = node->next; + task->wake_q.next = NULL; + + /* + * wake_up_process() executes a full barrier, which pairs with + * the queueing in wake_q_add() so as not to miss wakeups. + */ + wake_up_process(task); + put_task_struct(task); + } +} + +/* + * resched_curr - mark rq's current task 'to be rescheduled now'. + * + * On UP this means the setting of the need_resched flag, on SMP it + * might also involve a cross-CPU call to trigger the scheduler on + * the target CPU. + */ +void resched_curr(struct rq *rq) +{ + struct task_struct *curr = rq->curr; + int cpu; + + lockdep_assert_held(&rq->lock); + + if (test_tsk_need_resched(curr)) + return; + + cpu = cpu_of(rq); + if (cpu == smp_processor_id()) { + set_tsk_need_resched(curr); + set_preempt_need_resched(); + return; + } + + if (set_nr_and_not_polling(curr)) + smp_send_reschedule(cpu); + else + trace_sched_wake_idle_without_ipi(cpu); +} + +void resched_cpu(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + raw_spin_lock_irqsave(&rq->lock, flags); + if (cpu_online(cpu) || cpu == smp_processor_id()) + resched_curr(cpu_rq(cpu)); + raw_spin_unlock_irqrestore(&rq->lock, flags); +} + +#ifdef CONFIG_SMP +#ifdef CONFIG_NO_HZ_COMMON +void nohz_balance_enter_idle(int cpu) +{ +} + +void select_nohz_load_balancer(int stop_tick) +{ +} + +void set_cpu_sd_state_idle(void) {} + +/* + * In the semi idle case, use the nearest busy CPU for migrating timers + * from an idle CPU. This is good for power-savings. + * + * We don't do similar optimization for completely idle system, as + * selecting an idle CPU will add more delays to the timers than intended + * (as that CPU's timer base may not be uptodate wrt jiffies etc). + */ +int get_nohz_timer_target(void) +{ + int i, cpu = smp_processor_id(); + struct cpumask *mask; + + if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) + return cpu; + + for (mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + mask < per_cpu(sched_cpu_affinity_end_mask, cpu); mask++) + for_each_cpu(i, mask) + if (!idle_cpu(i) && housekeeping_cpu(i, HK_FLAG_TIMER)) + return i; + + if (!housekeeping_cpu(cpu, HK_FLAG_TIMER)) + cpu = housekeeping_any_cpu(HK_FLAG_TIMER); + + return cpu; +} + +/* + * When add_timer_on() enqueues a timer into the timer wheel of an + * idle CPU then this timer might expire before the next timer event + * which is scheduled to wake up that CPU. In case of a completely + * idle system the next event might even be infinite time into the + * future. wake_up_idle_cpu() ensures that the CPU is woken up and + * leaves the inner idle loop so the newly added timer is taken into + * account when the CPU goes back to idle and evaluates the timer + * wheel for the next timer event. + */ +static inline void wake_up_idle_cpu(int cpu) +{ + if (cpu == smp_processor_id()) + return; + + set_tsk_need_resched(cpu_rq(cpu)->idle); + smp_send_reschedule(cpu); +} + +static inline bool wake_up_full_nohz_cpu(int cpu) +{ + /* + * We just need the target to call irq_exit() and re-evaluate + * the next tick. The nohz full kick at least implies that. + * If needed we can still optimize that later with an + * empty IRQ. + */ + if (tick_nohz_full_cpu(cpu)) { + if (cpu != smp_processor_id() || + tick_nohz_tick_stopped()) + tick_nohz_full_kick_cpu(cpu); + return true; + } + + return false; +} + +void wake_up_nohz_cpu(int cpu) +{ + if (cpu_online(cpu) && !wake_up_full_nohz_cpu(cpu)) + wake_up_idle_cpu(cpu); +} + +#endif /* CONFIG_NO_HZ_COMMON */ +#endif /* CONFIG_SMP */ + +static inline void check_preempt_curr(struct rq *rq) +{ + if (rq_first_bmq_task(rq) != rq->curr) + resched_curr(rq); +} + +#ifdef CONFIG_SCHED_HRTICK +/* + * Use HR-timers to deliver accurate preemption points. + */ + +static void hrtick_clear(struct rq *rq) +{ + if (hrtimer_active(&rq->hrtick_timer)) + hrtimer_cancel(&rq->hrtick_timer); +} + +/* + * High-resolution timer tick. + * Runs from hardirq context with interrupts disabled. + */ +static enum hrtimer_restart hrtick(struct hrtimer *timer) +{ + struct rq *rq = container_of(timer, struct rq, hrtick_timer); + struct task_struct *p; + + WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); + + raw_spin_lock(&rq->lock); + p = rq->curr; + p->time_slice = 0; + resched_curr(rq); + raw_spin_unlock(&rq->lock); + + return HRTIMER_NORESTART; +} + +/* + * Use hrtick when: + * - enabled by features + * - hrtimer is actually high res + */ +static inline int hrtick_enabled(struct rq *rq) +{ + /** + * BMQ doesn't support sched_feat yet + if (!sched_feat(HRTICK)) + return 0; + */ + if (!cpu_active(cpu_of(rq))) + return 0; + return hrtimer_is_hres_active(&rq->hrtick_timer); +} + +#ifdef CONFIG_SMP + +static void __hrtick_restart(struct rq *rq) +{ + struct hrtimer *timer = &rq->hrtick_timer; + + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); +} + +/* + * called from hardirq (IPI) context + */ +static void __hrtick_start(void *arg) +{ + struct rq *rq = arg; + + raw_spin_lock(&rq->lock); + __hrtick_restart(rq); + rq->hrtick_csd_pending = 0; + raw_spin_unlock(&rq->lock); +} + +/* + * Called to set the hrtick timer state. + * + * called with rq->lock held and irqs disabled + */ +void hrtick_start(struct rq *rq, u64 delay) +{ + struct hrtimer *timer = &rq->hrtick_timer; + ktime_t time; + s64 delta; + + /* + * Don't schedule slices shorter than 10000ns, that just + * doesn't make sense and can cause timer DoS. + */ + delta = max_t(s64, delay, 10000LL); + time = ktime_add_ns(timer->base->get_time(), delta); + + hrtimer_set_expires(timer, time); + + if (rq == this_rq()) { + __hrtick_restart(rq); + } else if (!rq->hrtick_csd_pending) { + smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd); + rq->hrtick_csd_pending = 1; + } +} + +#else +/* + * Called to set the hrtick timer state. + * + * called with rq->lock held and irqs disabled + */ +void hrtick_start(struct rq *rq, u64 delay) +{ + /* + * Don't schedule slices shorter than 10000ns, that just + * doesn't make sense. Rely on vruntime for fairness. + */ + delay = max_t(u64, delay, 10000LL); + hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), + HRTIMER_MODE_REL_PINNED_HARD); +} +#endif /* CONFIG_SMP */ + +static void hrtick_rq_init(struct rq *rq) +{ +#ifdef CONFIG_SMP + rq->hrtick_csd_pending = 0; + + rq->hrtick_csd.flags = 0; + rq->hrtick_csd.func = __hrtick_start; + rq->hrtick_csd.info = rq; +#endif + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); + rq->hrtick_timer.function = hrtick; +} +#else /* CONFIG_SCHED_HRTICK */ +static inline int hrtick_enabled(struct rq *rq) +{ + return 0; +} + +static inline void hrtick_clear(struct rq *rq) +{ +} + +static inline void hrtick_rq_init(struct rq *rq) +{ +} +#endif /* CONFIG_SCHED_HRTICK */ + +static inline int normal_prio(struct task_struct *p) +{ + if (task_has_rt_policy(p)) + return MAX_RT_PRIO - 1 - p->rt_priority; + + return p->static_prio + MAX_PRIORITY_ADJ; +} + +/* + * Calculate the current priority, i.e. the priority + * taken into account by the scheduler. This value might + * be boosted by RT tasks as it will be RT if the task got + * RT-boosted. If not then it returns p->normal_prio. + */ +static int effective_prio(struct task_struct *p) +{ + p->normal_prio = normal_prio(p); + /* + * If we are RT tasks or we were boosted to RT priority, + * keep the priority unchanged. Otherwise, update priority + * to the normal priority: + */ + if (!rt_prio(p->prio)) + return p->normal_prio; + return p->prio; +} + +/* + * activate_task - move a task to the runqueue. + * + * Context: rq->lock + */ +static void activate_task(struct task_struct *p, struct rq *rq) +{ + if (task_contributes_to_load(p)) + rq->nr_uninterruptible--; + enqueue_task(p, rq, ENQUEUE_WAKEUP); + p->on_rq = TASK_ON_RQ_QUEUED; + cpufreq_update_util(rq, 0); +} + +/* + * deactivate_task - remove a task from the runqueue. + * + * Context: rq->lock + */ +static inline void deactivate_task(struct task_struct *p, struct rq *rq) +{ + if (task_contributes_to_load(p)) + rq->nr_uninterruptible++; + dequeue_task(p, rq, DEQUEUE_SLEEP); + p->on_rq = 0; + cpufreq_update_util(rq, 0); +} + +static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) +{ +#ifdef CONFIG_SMP + /* + * After ->cpu is set up to a new value, task_access_lock(p, ...) can be + * successfully executed on another CPU. We must ensure that updates of + * per-task data have been completed by this moment. + */ + smp_wmb(); + +#ifdef CONFIG_THREAD_INFO_IN_TASK + WRITE_ONCE(p->cpu, cpu); +#else + WRITE_ONCE(task_thread_info(p)->cpu, cpu); +#endif +#endif +} + +#ifdef CONFIG_SMP +void set_task_cpu(struct task_struct *p, unsigned int new_cpu) +{ +#ifdef CONFIG_SCHED_DEBUG + /* + * We should never call set_task_cpu() on a blocked task, + * ttwu() will sort out the placement. + */ + WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && + !p->on_rq); +#ifdef CONFIG_LOCKDEP + /* + * The caller should hold either p->pi_lock or rq->lock, when changing + * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. + * + * sched_move_task() holds both and thus holding either pins the cgroup, + * see task_group(). + */ + WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || + lockdep_is_held(&task_rq(p)->lock))); +#endif + /* + * Clearly, migrating tasks to offline CPUs is a fairly daft thing. + */ + WARN_ON_ONCE(!cpu_online(new_cpu)); +#endif + if (task_cpu(p) == new_cpu) + return; + trace_sched_migrate_task(p, new_cpu); + rseq_migrate(p); + perf_event_task_migrate(p); + + __set_task_cpu(p, new_cpu); +} + +static inline bool is_per_cpu_kthread(struct task_struct *p) +{ + return ((p->flags & PF_KTHREAD) && (1 == p->nr_cpus_allowed)); +} + +/* + * Per-CPU kthreads are allowed to run on !active && online CPUs, see + * __set_cpus_allowed_ptr() and select_fallback_rq(). + */ +static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +{ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + + if (is_per_cpu_kthread(p)) + return cpu_online(cpu); + + return cpu_active(cpu); +} + +/* + * This is how migration works: + * + * 1) we invoke migration_cpu_stop() on the target CPU using + * stop_one_cpu(). + * 2) stopper starts to run (implicitly forcing the migrated thread + * off the CPU) + * 3) it checks whether the migrated task is still in the wrong runqueue. + * 4) if it's in the wrong runqueue then the migration thread removes + * it and puts it into the right queue. + * 5) stopper completes and stop_one_cpu() returns and the migration + * is done. + */ + +/* + * move_queued_task - move a queued task to new rq. + * + * Returns (locked) new rq. Old rq's lock is released. + */ +static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int + new_cpu) +{ + lockdep_assert_held(&rq->lock); + + WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING); + dequeue_task(p, rq, 0); + set_task_cpu(p, new_cpu); + raw_spin_unlock(&rq->lock); + + rq = cpu_rq(new_cpu); + + raw_spin_lock(&rq->lock); + BUG_ON(task_cpu(p) != new_cpu); + enqueue_task(p, rq, 0); + p->on_rq = TASK_ON_RQ_QUEUED; + check_preempt_curr(rq); + + return rq; +} + +struct migration_arg { + struct task_struct *task; + int dest_cpu; +}; + +/* + * Move (not current) task off this CPU, onto the destination CPU. We're doing + * this because either it can't run here any more (set_cpus_allowed() + * away from this CPU, or CPU going down), or because we're + * attempting to rebalance this task on exec (sched_exec). + * + * So we race with normal scheduler movements, but that's OK, as long + * as the task is no longer on this CPU. + */ +static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int + dest_cpu) +{ + /* Affinity changed (again). */ + if (!is_cpu_allowed(p, dest_cpu)) + return rq; + + update_rq_clock(rq); + return move_queued_task(rq, p, dest_cpu); +} + +/* + * migration_cpu_stop - this will be executed by a highprio stopper thread + * and performs thread migration by bumping thread off CPU then + * 'pushing' onto another runqueue. + */ +static int migration_cpu_stop(void *data) +{ + struct migration_arg *arg = data; + struct task_struct *p = arg->task; + struct rq *rq = this_rq(); + + /* + * The original target CPU might have gone down and we might + * be on another CPU but it doesn't matter. + */ + local_irq_disable(); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); + /* + * If task_rq(p) != rq, it cannot be migrated here, because we're + * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because + * we're holding p->pi_lock. + */ + if (task_rq(p) == rq && task_on_rq_queued(p)) + rq = __migrate_task(rq, p, arg->dest_cpu); + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); + + local_irq_enable(); + return 0; +} + +static inline void +set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) +{ + cpumask_copy(&p->cpus_mask, new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); +} + +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +{ + set_cpus_allowed_common(p, new_mask); +} +#endif + +/** + * task_curr - is this task currently executing on a CPU? + * @p: the task in question. + * + * Return: 1 if the task is currently executing. 0 otherwise. + */ +inline int task_curr(const struct task_struct *p) +{ + return cpu_curr(task_cpu(p)) == p; +} + +#ifdef CONFIG_SMP +/* + * wait_task_inactive - wait for a thread to unschedule. + * + * If @match_state is nonzero, it's the @p->state value just checked and + * not expected to change. If it changes, i.e. @p might have woken up, + * then return zero. When we succeed in waiting for @p to be off its CPU, + * we return a positive number (its total switch count). If a second call + * a short while later returns the same number, the caller can be sure that + * @p has remained unscheduled the whole time. + * + * The caller must ensure that the task *will* unschedule sometime soon, + * else this function might spin for a *long* time. This function can't + * be called with interrupts off, or it may introduce deadlock with + * smp_call_function() if an IPI is sent by the same process we are + * waiting to become inactive. + */ +unsigned long wait_task_inactive(struct task_struct *p, long match_state) +{ + unsigned long flags; + bool running, on_rq; + unsigned long ncsw; + struct rq *rq; + raw_spinlock_t *lock; + + for (;;) { + rq = task_rq(p); + + /* + * If the task is actively running on another CPU + * still, just relax and busy-wait without holding + * any locks. + * + * NOTE! Since we don't hold any locks, it's not + * even sure that "rq" stays as the right runqueue! + * But we don't care, since this will return false + * if the runqueue has changed and p is actually now + * running somewhere else! + */ + while (task_running(p) && p == rq->curr) { + if (match_state && unlikely(p->state != match_state)) + return 0; + cpu_relax(); + } + + /* + * Ok, time to look more closely! We need the rq + * lock now, to be *sure*. If we're wrong, we'll + * just go back and repeat. + */ + task_access_lock_irqsave(p, &lock, &flags); + trace_sched_wait_task(p); + running = task_running(p); + on_rq = p->on_rq; + ncsw = 0; + if (!match_state || p->state == match_state) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_access_unlock_irqrestore(p, lock, &flags); + + /* + * If it changed from the expected state, bail out now. + */ + if (unlikely(!ncsw)) + break; + + /* + * Was it really running after all now that we + * checked with the proper locks actually held? + * + * Oops. Go back and try again.. + */ + if (unlikely(running)) { + cpu_relax(); + continue; + } + + /* + * It's not enough that it's not actively running, + * it must be off the runqueue _entirely_, and not + * preempted! + * + * So if it was still runnable (but just not actively + * running right now), it's preempted, and we should + * yield - it could be a while. + */ + if (unlikely(on_rq)) { + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout(&to, HRTIMER_MODE_REL); + continue; + } + + /* + * Ahh, all good. It wasn't running, and it wasn't + * runnable, which means that it will never become + * running in the future either. We're all done! + */ + break; + } + + return ncsw; +} + +/*** + * kick_process - kick a running thread to enter/exit the kernel + * @p: the to-be-kicked thread + * + * Cause a process which is running on another CPU to enter + * kernel-mode, without any delay. (to get signals handled.) + * + * NOTE: this function doesn't have to take the runqueue lock, + * because all it wants to ensure is that the remote task enters + * the kernel. If the IPI races and the task has been migrated + * to another CPU then no harm is done and the purpose has been + * achieved as well. + */ +void kick_process(struct task_struct *p) +{ + int cpu; + + preempt_disable(); + cpu = task_cpu(p); + if ((cpu != smp_processor_id()) && task_curr(p)) + smp_send_reschedule(cpu); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kick_process); + +/* + * ->cpus_ptr is protected by both rq->lock and p->pi_lock + * + * A few notes on cpu_active vs cpu_online: + * + * - cpu_active must be a subset of cpu_online + * + * - on CPU-up we allow per-CPU kthreads on the online && !active CPU, + * see __set_cpus_allowed_ptr(). At this point the newly online + * CPU isn't yet part of the sched domains, and balancing will not + * see it. + * + * - on cpu-down we clear cpu_active() to mask the sched domains and + * avoid the load balancer to place new tasks on the to be removed + * CPU. Existing tasks will remain running there and will be taken + * off. + * + * This means that fallback selection must not select !active CPUs. + * And can assume that any active CPU must be online. Conversely + * select_task_rq() below may allow selection of !active CPUs in order + * to satisfy the above rules. + */ +static int select_fallback_rq(int cpu, struct task_struct *p) +{ + int nid = cpu_to_node(cpu); + const struct cpumask *nodemask = NULL; + enum { cpuset, possible, fail } state = cpuset; + int dest_cpu; + + /* + * If the node that the CPU is on has been offlined, cpu_to_node() + * will return -1. There is no CPU on the node, and we should + * select the CPU on the other node. + */ + if (nid != -1) { + nodemask = cpumask_of_node(nid); + + /* Look for allowed, online CPU in same node. */ + for_each_cpu(dest_cpu, nodemask) { + if (!cpu_active(dest_cpu)) + continue; + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) + return dest_cpu; + } + } + + for (;;) { + /* Any allowed, online CPU? */ + for_each_cpu(dest_cpu, p->cpus_ptr) { + if (!is_cpu_allowed(p, dest_cpu)) + continue; + goto out; + } + + /* No more Mr. Nice Guy. */ + switch (state) { + case cpuset: + if (IS_ENABLED(CONFIG_CPUSETS)) { + cpuset_cpus_allowed_fallback(p); + state = possible; + break; + } + /* Fall-through */ + case possible: + do_set_cpus_allowed(p, cpu_possible_mask); + state = fail; + break; + + case fail: + BUG(); + break; + } + } + +out: + if (state != cpuset) { + /* + * Don't tell them about moving exiting tasks or + * kernel threads (both mm NULL), since they never + * leave kernel. + */ + if (p->mm && printk_ratelimit()) { + printk_deferred("process %d (%s) no longer affine to cpu%d\n", + task_pid_nr(p), p->comm, cpu); + } + } + + return dest_cpu; +} + +static inline int select_task_rq(struct task_struct *p) +{ + cpumask_t chk_mask, tmp; + + if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_online_mask))) + return select_fallback_rq(task_cpu(p), p); + + if ( +#ifdef CONFIG_SCHED_SMT + cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) || +#endif + cpumask_and(&tmp, &chk_mask, &sched_rq_watermark[IDLE_WM]) || + cpumask_and(&tmp, &chk_mask, + &sched_rq_watermark[task_sched_prio(p) + 1])) + return best_mask_cpu(task_cpu(p), &tmp); + + return best_mask_cpu(task_cpu(p), &chk_mask); +} + +void sched_set_stop_task(int cpu, struct task_struct *stop) +{ + struct sched_param stop_param = { .sched_priority = STOP_PRIO }; + struct sched_param start_param = { .sched_priority = 0 }; + struct task_struct *old_stop = cpu_rq(cpu)->stop; + + if (stop) { + /* + * Make it appear like a SCHED_FIFO task, its something + * userspace knows about and won't get confused about. + * + * Also, it will make PI more or less work without too + * much confusion -- but then, stop work should not + * rely on PI working anyway. + */ + sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param); + } + + cpu_rq(cpu)->stop = stop; + + if (old_stop) { + /* + * Reset it back to a normal scheduling policy so that + * it can die in pieces. + */ + sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param); + } +} + +/* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on + * is removed from the allowed bitmask. + * + * NOTE: the caller must have a valid reference to the task, the + * task must not exit() & deallocate itself prematurely. The + * call is not atomic; no spinlocks may be held. + */ +static int __set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, bool check) +{ + const struct cpumask *cpu_valid_mask = cpu_active_mask; + int dest_cpu; + unsigned long flags; + struct rq *rq; + raw_spinlock_t *lock; + int ret = 0; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_access_lock(p, &lock); + + if (p->flags & PF_KTHREAD) { + /* + * Kernel threads are allowed on online && !active CPUs + */ + cpu_valid_mask = cpu_online_mask; + } + + /* + * Must re-check here, to close a race against __kthread_bind(), + * sched_setaffinity() is not guaranteed to observe the flag. + */ + if (check && (p->flags & PF_NO_SETAFFINITY)) { + ret = -EINVAL; + goto out; + } + + if (cpumask_equal(p->cpus_ptr, new_mask)) + goto out; + + dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); + if (dest_cpu >= nr_cpu_ids) { + ret = -EINVAL; + goto out; + } + + do_set_cpus_allowed(p, new_mask); + + if (p->flags & PF_KTHREAD) { + /* + * For kernel threads that do indeed end up on online && + * !active we want to ensure they are strict per-CPU threads. + */ + WARN_ON(cpumask_intersects(new_mask, cpu_online_mask) && + !cpumask_intersects(new_mask, cpu_active_mask) && + p->nr_cpus_allowed != 1); + } + + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpumask_test_cpu(task_cpu(p), new_mask)) + goto out; + + if (task_running(p) || p->state == TASK_WAKING) { + struct migration_arg arg = { p, dest_cpu }; + + /* Need help from migration thread: drop lock and wait. */ + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); + return 0; + } + if (task_on_rq_queued(p)) { + /* + * OK, since we're going to drop the lock immediately + * afterwards anyway. + */ + update_rq_clock(rq); + rq = move_queued_task(rq, p, dest_cpu); + lock = &rq->lock; + } + +out: + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + return ret; +} + +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) +{ + return __set_cpus_allowed_ptr(p, new_mask, false); +} +EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + +#else /* CONFIG_SMP */ + +static inline int select_task_rq(struct task_struct *p) +{ + return 0; +} + +static inline int +__set_cpus_allowed_ptr(struct task_struct *p, + const struct cpumask *new_mask, bool check) +{ + return set_cpus_allowed_ptr(p, new_mask); +} + +#endif /* CONFIG_SMP */ + +static void +ttwu_stat(struct task_struct *p, int cpu, int wake_flags) +{ + struct rq *rq; + + if (!schedstat_enabled()) + return; + + rq= this_rq(); + +#ifdef CONFIG_SMP + if (cpu == rq->cpu) + __schedstat_inc(rq->ttwu_local); + else { + /** BMQ ToDo: + * How to do ttwu_wake_remote + */ + } +#endif /* CONFIG_SMP */ + + __schedstat_inc(rq->ttwu_count); +} + +/* + * Mark the task runnable and perform wakeup-preemption. + */ +static inline void +ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) +{ + p->state = TASK_RUNNING; + trace_sched_wakeup(p); +} + +static inline void +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) +{ +#ifdef CONFIG_SMP + if (p->sched_contributes_to_load) + rq->nr_uninterruptible--; +#endif + + activate_task(p, rq); + ttwu_do_wakeup(rq, p, 0); +} + +static int ttwu_remote(struct task_struct *p, int wake_flags) +{ + struct rq *rq; + raw_spinlock_t *lock; + int ret = 0; + + rq = __task_access_lock(p, &lock); + if (task_on_rq_queued(p)) { + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } + __task_access_unlock(p, lock); + + return ret; +} + +#ifdef CONFIG_SMP +void scheduler_ipi(void) +{ + /* + * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting + * TIF_NEED_RESCHED remotely (for the first time) will also send + * this IPI. + */ + preempt_fold_need_resched(); + + if (!idle_cpu(smp_processor_id()) || need_resched()) + return; + + irq_enter(); + irq_exit(); +} + +void wake_up_if_idle(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + rcu_read_lock(); + + if (!is_idle_task(rcu_dereference(rq->curr))) + goto out; + + if (set_nr_if_polling(rq->idle)) { + trace_sched_wake_idle_without_ipi(cpu); + } else { + raw_spin_lock_irqsave(&rq->lock, flags); + if (is_idle_task(rq->curr)) + smp_send_reschedule(cpu); + /* Else CPU is not idle, do nothing here */ + raw_spin_unlock_irqrestore(&rq->lock, flags); + } + +out: + rcu_read_unlock(); +} + +bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); +} +#endif /* CONFIG_SMP */ + +static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags) +{ + struct rq *rq = cpu_rq(cpu); + + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + ttwu_do_activate(rq, p, wake_flags); + check_preempt_curr(rq); + raw_spin_unlock(&rq->lock); +} + +/* + * Notes on Program-Order guarantees on SMP systems. + * + * MIGRATION + * + * The basic program-order guarantee on SMP systems is that when a task [t] + * migrates, all its activity on its old CPU [c0] happens-before any subsequent + * execution on its new CPU [c1]. + * + * For migration (of runnable tasks) this is provided by the following means: + * + * A) UNLOCK of the rq(c0)->lock scheduling out task t + * B) migration for t is required to synchronize *both* rq(c0)->lock and + * rq(c1)->lock (if not at the same time, then in that order). + * C) LOCK of the rq(c1)->lock scheduling in task + * + * Transitivity guarantees that B happens after A and C after B. + * Note: we only require RCpc transitivity. + * Note: the CPU doing B need not be c0 or c1 + * + * Example: + * + * CPU0 CPU1 CPU2 + * + * LOCK rq(0)->lock + * sched-out X + * sched-in Y + * UNLOCK rq(0)->lock + * + * LOCK rq(0)->lock // orders against CPU0 + * dequeue X + * UNLOCK rq(0)->lock + * + * LOCK rq(1)->lock + * enqueue X + * UNLOCK rq(1)->lock + * + * LOCK rq(1)->lock // orders against CPU2 + * sched-out Z + * sched-in X + * UNLOCK rq(1)->lock + * + * + * BLOCKING -- aka. SLEEP + WAKEUP + * + * For blocking we (obviously) need to provide the same guarantee as for + * migration. However the means are completely different as there is no lock + * chain to provide order. Instead we do: + * + * 1) smp_store_release(X->on_cpu, 0) + * 2) smp_cond_load_acquire(!X->on_cpu) + * + * Example: + * + * CPU0 (schedule) CPU1 (try_to_wake_up) CPU2 (schedule) + * + * LOCK rq(0)->lock LOCK X->pi_lock + * dequeue X + * sched-out X + * smp_store_release(X->on_cpu, 0); + * + * smp_cond_load_acquire(&X->on_cpu, !VAL); + * X->state = WAKING + * set_task_cpu(X,2) + * + * LOCK rq(2)->lock + * enqueue X + * X->state = RUNNING + * UNLOCK rq(2)->lock + * + * LOCK rq(2)->lock // orders against CPU1 + * sched-out Z + * sched-in X + * UNLOCK rq(2)->lock + * + * UNLOCK X->pi_lock + * UNLOCK rq(0)->lock + * + * + * However; for wakeups there is a second guarantee we must provide, namely we + * must observe the state that lead to our wakeup. That is, not only must our + * task observe its own prior state, it must also observe the stores prior to + * its wakeup. + * + * This means that any means of doing remote wakeups must order the CPU doing + * the wakeup against the CPU the task is going to end up running on. This, + * however, is already required for the regular Program-Order guarantee above, + * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire). + * + */ + +/*** + * try_to_wake_up - wake up a thread + * @p: the thread to be awakened + * @state: the mask of task states that can be woken + * @wake_flags: wake modifier flags (WF_*) + * + * Put it on the run-queue if it's not already there. The "current" + * thread is always on the run-queue (except when the actual + * re-schedule is in progress), and as such you're allowed to do + * the simpler "current->state = TASK_RUNNING" to mark yourself + * runnable without the overhead of this. + * + * Return: %true if @p was woken up, %false if it was already running. + * or @state didn't match @p's state. + */ +static int try_to_wake_up(struct task_struct *p, unsigned int state, + int wake_flags) +{ + unsigned long flags; + int cpu, success = 0; + + preempt_disable(); + if (p == current) { + /* + * We're waking current, this means 'p->on_rq' and 'task_cpu(p) + * == smp_processor_id()'. Together this means we can special + * case the whole 'p->on_rq && ttwu_remote()' case below + * without taking any locks. + * + * In particular: + * - we rely on Program-Order guarantees for all the ordering, + * - we're serialized against set_special_state() by virtue of + * it disabling IRQs (this allows not taking ->pi_lock). + */ + if (!(p->state & state)) + goto out; + + success = 1; + cpu = task_cpu(p); + trace_sched_waking(p); + p->state = TASK_RUNNING; + trace_sched_wakeup(p); + goto out; + } + + /* + * If we are going to wake up a thread waiting for CONDITION we + * need to ensure that CONDITION=1 done by the caller can not be + * reordered with p->state check below. This pairs with mb() in + * set_current_state() the waiting thread does. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); + if (!(p->state & state)) + goto unlock; + + trace_sched_waking(p); + + /* We're going to change ->state: */ + success = 1; + cpu = task_cpu(p); + + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * + * sched_ttwu_pending() try_to_wake_up() + * STORE p->on_rq = 1 LOAD p->state + * UNLOCK rq->lock + * + * __schedule() (switch to task 'p') + * LOCK rq->lock smp_rmb(); + * smp_mb__after_spinlock(); + * UNLOCK rq->lock + * + * [task p] + * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). + */ + smp_rmb(); + if (p->on_rq && ttwu_remote(p, wake_flags)) + goto unlock; + +#ifdef CONFIG_SMP + /* + * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be + * possible to, falsely, observe p->on_cpu == 0. + * + * One must be running (->on_cpu == 1) in order to remove oneself + * from the runqueue. + * + * __schedule() (switch to task 'p') try_to_wake_up() + * STORE p->on_cpu = 1 LOAD p->on_rq + * UNLOCK rq->lock + * + * __schedule() (put 'p' to sleep) + * LOCK rq->lock smp_rmb(); + * smp_mb__after_spinlock(); + * STORE p->on_rq = 0 LOAD p->on_cpu + * + * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in + * __schedule(). See the comment for smp_mb__after_spinlock(). + */ + smp_rmb(); + + /* + * If the owning (remote) CPU is still in the middle of schedule() with + * this task as prev, wait until its done referencing the task. + * + * Pairs with the smp_store_release() in finish_task(). + * + * This ensures that tasks getting woken will be fully ordered against + * their previous state and preserve Program Order. + */ + smp_cond_load_acquire(&p->on_cpu, !VAL); + + p->sched_contributes_to_load = !!task_contributes_to_load(p); + p->state = TASK_WAKING; + + if (p->in_iowait) { + delayacct_blkio_end(p); + atomic_dec(&task_rq(p)->nr_iowait); + } + + if(cpu_rq(smp_processor_id())->clock - p->last_ran > sched_timeslice_ns) + boost_task(p); + + cpu = select_task_rq(p); + + if (cpu != task_cpu(p)) { + wake_flags |= WF_MIGRATED; + psi_ttwu_dequeue(p); + set_task_cpu(p, cpu); + } +#else /* CONFIG_SMP */ + if (p->in_iowait) { + delayacct_blkio_end(p); + atomic_dec(&task_rq(p)->nr_iowait); + } +#endif /* CONFIG_SMP */ + + ttwu_queue(p, cpu, wake_flags); +unlock: + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +out: + if (success) + ttwu_stat(p, cpu, wake_flags); + preempt_enable(); + + return success; +} + +/** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. + * + * Attempt to wake up the nominated process and move it to the set of runnable + * processes. + * + * Return: 1 if the process was woken up, 0 if it was already running. + * + * This function executes a full memory barrier before accessing the task state. + */ +int wake_up_process(struct task_struct *p) +{ + return try_to_wake_up(p, TASK_NORMAL, 0); +} +EXPORT_SYMBOL(wake_up_process); + +int wake_up_state(struct task_struct *p, unsigned int state) +{ + return try_to_wake_up(p, state, 0); +} + +/* + * Perform scheduler related setup for a newly forked process p. + * p is forked by current. + * + * __sched_fork() is basic setup used by init_idle() too: + */ +static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p) +{ + p->on_rq = 0; + p->on_cpu = 0; + p->utime = 0; + p->stime = 0; + p->sched_time = 0; + +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif + +#ifdef CONFIG_COMPACTION + p->capture_control = NULL; +#endif +} + +/* + * fork()/clone()-time setup: + */ +int sched_fork(unsigned long clone_flags, struct task_struct *p) +{ + unsigned long flags; + int cpu = get_cpu(); + struct rq *rq = this_rq(); + + __sched_fork(clone_flags, p); + /* + * We mark the process as NEW here. This guarantees that + * nobody will actually run it, and a signal or other external + * event cannot wake it up and insert it on the runqueue either. + */ + p->state = TASK_NEW; + + /* + * Make sure we do not leak PI boosting priority to the child. + */ + p->prio = current->normal_prio; + + /* + * Revert to default priority/policy on fork if requested. + */ + if (unlikely(p->sched_reset_on_fork)) { + if (task_has_rt_policy(p)) { + p->policy = SCHED_NORMAL; + p->static_prio = NICE_TO_PRIO(0); + p->rt_priority = 0; + } else if (PRIO_TO_NICE(p->static_prio) < 0) + p->static_prio = NICE_TO_PRIO(0); + + p->prio = p->normal_prio = normal_prio(p); + + /* + * We don't need the reset flag anymore after the fork. It has + * fulfilled its duty: + */ + p->sched_reset_on_fork = 0; + } + + p->boost_prio = (p->boost_prio < 0) ? + p->boost_prio + MAX_PRIORITY_ADJ : MAX_PRIORITY_ADJ; + /* + * Share the timeslice between parent and child, thus the + * total amount of pending timeslices in the system doesn't change, + * resulting in more scheduling fairness. + */ + raw_spin_lock_irqsave(&rq->lock, flags); + rq->curr->time_slice /= 2; + p->time_slice = rq->curr->time_slice; +#ifdef CONFIG_SCHED_HRTICK + hrtick_start(rq, rq->curr->time_slice); +#endif + + if (p->time_slice < RESCHED_NS) { + p->time_slice = sched_timeslice_ns; + resched_curr(rq); + } + raw_spin_unlock_irqrestore(&rq->lock, flags); + + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() + * is ran before sched_fork(). + * + * Silence PROVE_RCU. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + /* + * We're setting the CPU for the first time, we don't migrate, + * so use __set_task_cpu(). + */ + __set_task_cpu(p, cpu); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + +#ifdef CONFIG_SCHED_INFO + if (unlikely(sched_info_on())) + memset(&p->sched_info, 0, sizeof(p->sched_info)); +#endif + init_task_preempt_count(p); + + put_cpu(); + return 0; +} + +#ifdef CONFIG_SCHEDSTATS + +DEFINE_STATIC_KEY_FALSE(sched_schedstats); +static bool __initdata __sched_schedstats = false; + +static void set_schedstats(bool enabled) +{ + if (enabled) + static_branch_enable(&sched_schedstats); + else + static_branch_disable(&sched_schedstats); +} + +void force_schedstat_enabled(void) +{ + if (!schedstat_enabled()) { + pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n"); + static_branch_enable(&sched_schedstats); + } +} + +static int __init setup_schedstats(char *str) +{ + int ret = 0; + if (!str) + goto out; + + /* + * This code is called before jump labels have been set up, so we can't + * change the static branch directly just yet. Instead set a temporary + * variable so init_schedstats() can do it later. + */ + if (!strcmp(str, "enable")) { + __sched_schedstats = true; + ret = 1; + } else if (!strcmp(str, "disable")) { + __sched_schedstats = false; + ret = 1; + } +out: + if (!ret) + pr_warn("Unable to parse schedstats=\n"); + + return ret; +} +__setup("schedstats=", setup_schedstats); + +static void __init init_schedstats(void) +{ + set_schedstats(__sched_schedstats); +} + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_schedstats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = static_branch_likely(&sched_schedstats); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_schedstats(state); + return err; +} +#endif /* CONFIG_PROC_SYSCTL */ +#else /* !CONFIG_SCHEDSTATS */ +static inline void init_schedstats(void) {} +#endif /* CONFIG_SCHEDSTATS */ + +/* + * wake_up_new_task - wake up a newly created task for the first time. + * + * This function will do some initial scheduler statistics housekeeping + * that must be done for every newly created context, then puts the task + * on the runqueue and wakes it. + */ +void wake_up_new_task(struct task_struct *p) +{ + unsigned long flags; + struct rq *rq; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + + p->state = TASK_RUNNING; + + rq = cpu_rq(select_task_rq(p)); +#ifdef CONFIG_SMP + /* + * Fork balancing, do it here and not earlier because: + * - cpus_ptr can change in the fork path + * - any previously selected CPU might disappear through hotplug + * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, + * as we're not fully set-up yet. + */ + __set_task_cpu(p, cpu_of(rq)); +#endif + + raw_spin_lock(&rq->lock); + + update_rq_clock(rq); + activate_task(p, rq); + trace_sched_wakeup_new(p); + check_preempt_curr(rq); + + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} + +#ifdef CONFIG_PREEMPT_NOTIFIERS + +static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key); + +void preempt_notifier_inc(void) +{ + static_branch_inc(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_inc); + +void preempt_notifier_dec(void) +{ + static_branch_dec(&preempt_notifier_key); +} +EXPORT_SYMBOL_GPL(preempt_notifier_dec); + +/** + * preempt_notifier_register - tell me when current is being preempted & rescheduled + * @notifier: notifier struct to register + */ +void preempt_notifier_register(struct preempt_notifier *notifier) +{ + if (!static_branch_unlikely(&preempt_notifier_key)) + WARN(1, "registering preempt_notifier while notifiers disabled\n"); + + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); +} +EXPORT_SYMBOL_GPL(preempt_notifier_register); + +/** + * preempt_notifier_unregister - no longer interested in preemption notifications + * @notifier: notifier struct to unregister + * + * This is *not* safe to call from within a preemption notifier. + */ +void preempt_notifier_unregister(struct preempt_notifier *notifier) +{ + hlist_del(¬ifier->link); +} +EXPORT_SYMBOL_GPL(preempt_notifier_unregister); + +static void __fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ + struct preempt_notifier *notifier; + + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) + notifier->ops->sched_in(notifier, raw_smp_processor_id()); +} + +static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ + if (static_branch_unlikely(&preempt_notifier_key)) + __fire_sched_in_preempt_notifiers(curr); +} + +static void +__fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ + struct preempt_notifier *notifier; + + hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) + notifier->ops->sched_out(notifier, next); +} + +static __always_inline void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ + if (static_branch_unlikely(&preempt_notifier_key)) + __fire_sched_out_preempt_notifiers(curr, next); +} + +#else /* !CONFIG_PREEMPT_NOTIFIERS */ + +static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ +} + +static inline void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ +} + +#endif /* CONFIG_PREEMPT_NOTIFIERS */ + +static inline void prepare_task(struct task_struct *next) +{ + /* + * Claim the task as running, we do this before switching to it + * such that any running task will have this set. + */ + next->on_cpu = 1; +} + +static inline void finish_task(struct task_struct *prev) +{ +#ifdef CONFIG_SMP + /* + * After ->on_cpu is cleared, the task can be moved to a different CPU. + * We must ensure this doesn't happen until the switch is completely + * finished. + * + * In particular, the load of prev->state in finish_task_switch() must + * happen before this. + * + * Pairs with the smp_cond_load_acquire() in try_to_wake_up(). + */ + smp_store_release(&prev->on_cpu, 0); +#else + prev->on_cpu = 0; +#endif +} + +static inline void +prepare_lock_switch(struct rq *rq, struct task_struct *next) +{ + /* + * Since the runqueue lock will be released by the next + * task (which is an invalid locking op but in the case + * of the scheduler it's an obvious special-case), so we + * do an early lockdep release here: + */ + spin_release(&rq->lock.dep_map, _THIS_IP_); +#ifdef CONFIG_DEBUG_SPINLOCK + /* this is a valid case when another task releases the spinlock */ + rq->lock.owner = next; +#endif +} + +static inline void finish_lock_switch(struct rq *rq) +{ + /* + * If we are tracking spinlock dependencies then we have to + * fix up the runqueue lock - which gets 'carried over' from + * prev into current: + */ + spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); + raw_spin_unlock_irq(&rq->lock); +} + +/** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch + * @next: the task we are going to switch to. + * + * This is called with the rq lock held and interrupts off. It must + * be paired with a subsequent finish_task_switch after the context + * switch. + * + * prepare_task_switch sets up locking and calls architecture specific + * hooks. + */ +static inline void +prepare_task_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) +{ + kcov_prepare_switch(prev); + sched_info_switch(rq, prev, next); + perf_event_task_sched_out(prev, next); + rseq_preempt(prev); + fire_sched_out_preempt_notifiers(prev, next); + prepare_task(next); + prepare_arch_switch(next); +} + +/** + * finish_task_switch - clean up after a task-switch + * @rq: runqueue associated with task-switch + * @prev: the thread we just switched away from. + * + * finish_task_switch must be called after the context switch, paired + * with a prepare_task_switch call before the context switch. + * finish_task_switch will reconcile locking set up by prepare_task_switch, + * and do any other architecture-specific cleanup actions. + * + * Note that we may have delayed dropping an mm in context_switch(). If + * so, we finish that here outside of the runqueue lock. (Doing it + * with the lock held can cause deadlocks; see schedule() for + * details.) + * + * The context switch have flipped the stack from under us and restored the + * local variables which were saved when this task called schedule() in the + * past. prev == current is still correct but we need to recalculate this_rq + * because prev may have moved to another CPU. + */ +static struct rq *finish_task_switch(struct task_struct *prev) + __releases(rq->lock) +{ + struct rq *rq = this_rq(); + struct mm_struct *mm = rq->prev_mm; + long prev_state; + + /* + * The previous task will have left us with a preempt_count of 2 + * because it left us after: + * + * schedule() + * preempt_disable(); // 1 + * __schedule() + * raw_spin_lock_irq(&rq->lock) // 2 + * + * Also, see FORK_PREEMPT_COUNT. + */ + if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET, + "corrupted preempt_count: %s/%d/0x%x\n", + current->comm, current->pid, preempt_count())) + preempt_count_set(FORK_PREEMPT_COUNT); + + rq->prev_mm = NULL; + + /* + * A task struct has one reference for the use as "current". + * If a task dies, then it sets TASK_DEAD in tsk->state and calls + * schedule one last time. The schedule call will never return, and + * the scheduled task must drop that reference. + * + * We must observe prev->state before clearing prev->on_cpu (in + * finish_task), otherwise a concurrent wakeup can get prev + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. + */ + prev_state = prev->state; + vtime_task_switch(prev); + perf_event_task_sched_in(prev, current); + finish_task(prev); + finish_lock_switch(rq); + finish_arch_post_lock_switch(); + kcov_finish_switch(current); + + fire_sched_in_preempt_notifiers(current); + /* + * When switching through a kernel thread, the loop in + * membarrier_{private,global}_expedited() may have observed that + * kernel thread and not issued an IPI. It is therefore possible to + * schedule between user->kernel->user threads without passing though + * switch_mm(). Membarrier requires a barrier after storing to + * rq->curr, before returning to userspace, so provide them here: + * + * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly + * provided by mmdrop(), + * - a sync_core for SYNC_CORE. + */ + if (mm) { + membarrier_mm_sync_core_before_usermode(mm); + mmdrop(mm); + } + if (unlikely(prev_state == TASK_DEAD)) { + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); + + /* Task is done with its stack. */ + put_task_stack(prev); + + put_task_struct_rcu_user(prev); + } + + tick_nohz_task_switch(); + return rq; +} + +/** + * schedule_tail - first thing a freshly forked thread must call. + * @prev: the thread we just switched away from. + */ +asmlinkage __visible void schedule_tail(struct task_struct *prev) + __releases(rq->lock) +{ + struct rq *rq; + + /* + * New tasks start with FORK_PREEMPT_COUNT, see there and + * finish_task_switch() for details. + * + * finish_task_switch() will drop rq->lock() and lower preempt_count + * and the preempt_enable() will end up enabling preemption (on + * PREEMPT_COUNT kernels). + */ + + rq = finish_task_switch(prev); + preempt_enable(); + + if (current->set_child_tid) + put_user(task_pid_vnr(current), current->set_child_tid); + + calculate_sigpending(); +} + +/* + * context_switch - switch to the new MM and the new thread's register state. + */ +static __always_inline struct rq * +context_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) +{ + prepare_task_switch(rq, prev, next); + + /* + * For paravirt, this is coupled with an exit in switch_to to + * combine the page table reload and the switch backend into + * one hypercall. + */ + arch_start_context_switch(prev); + + /* + * kernel -> kernel lazy + transfer active + * user -> kernel lazy + mmgrab() active + * + * kernel -> user switch + mmdrop() active + * user -> user switch + */ + if (!next->mm) { // to kernel + enter_lazy_tlb(prev->active_mm, next); + + next->active_mm = prev->active_mm; + if (prev->mm) // from user + mmgrab(prev->active_mm); + else + prev->active_mm = NULL; + } else { // to user + membarrier_switch_mm(rq, prev->active_mm, next->mm); + /* + * sys_membarrier() requires an smp_mb() between setting + * rq->curr / membarrier_switch_mm() and returning to userspace. + * + * The below provides this either through switch_mm(), or in + * case 'prev->active_mm == next->mm' through + * finish_task_switch()'s mmdrop(). + */ + switch_mm_irqs_off(prev->active_mm, next->mm, next); + + if (!prev->mm) { // from kernel + /* will mmdrop() in finish_task_switch(). */ + rq->prev_mm = prev->active_mm; + prev->active_mm = NULL; + } + } + + prepare_lock_switch(rq, next); + + /* Here we just switch the register state and the stack. */ + switch_to(prev, next, prev); + barrier(); + + return finish_task_switch(prev); +} + +/* + * nr_running, nr_uninterruptible and nr_context_switches: + * + * externally visible scheduler statistics: current number of runnable + * threads, total number of context switches performed since bootup. + */ +unsigned long nr_running(void) +{ + unsigned long i, sum = 0; + + for_each_online_cpu(i) + sum += cpu_rq(i)->nr_running; + + return sum; +} + +/* + * Check if only the current task is running on the CPU. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptible section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) + */ +bool single_task_running(void) +{ + return raw_rq()->nr_running == 1; +} +EXPORT_SYMBOL(single_task_running); + +unsigned long long nr_context_switches(void) +{ + int i; + unsigned long long sum = 0; + + for_each_possible_cpu(i) + sum += cpu_rq(i)->nr_switches; + + return sum; +} + +/* + * Consumers of these two interfaces, like for example the cpuidle menu + * governor, are using nonsensical data. Preferring shallow idle state selection + * for a CPU that has IO-wait which might not even end up running the task when + * it does become runnable. + */ + +unsigned long nr_iowait_cpu(int cpu) +{ + return atomic_read(&cpu_rq(cpu)->nr_iowait); +} + +/* + * IO-wait accounting, and how its mostly bollocks (on SMP). + * + * The idea behind IO-wait account is to account the idle time that we could + * have spend running if it were not for IO. That is, if we were to improve the + * storage performance, we'd have a proportional reduction in IO-wait time. + * + * This all works nicely on UP, where, when a task blocks on IO, we account + * idle time as IO-wait, because if the storage were faster, it could've been + * running and we'd not be idle. + * + * This has been extended to SMP, by doing the same for each CPU. This however + * is broken. + * + * Imagine for instance the case where two tasks block on one CPU, only the one + * CPU will have IO-wait accounted, while the other has regular idle. Even + * though, if the storage were faster, both could've ran at the same time, + * utilising both CPUs. + * + * This means, that when looking globally, the current IO-wait accounting on + * SMP is a lower bound, by reason of under accounting. + * + * Worse, since the numbers are provided per CPU, they are sometimes + * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly + * associated with any one particular CPU, it can wake to another CPU than it + * blocked on. This means the per CPU IO-wait number is meaningless. + * + * Task CPU affinities can make all that even more 'interesting'. + */ + +unsigned long nr_iowait(void) +{ + unsigned long i, sum = 0; + + for_each_possible_cpu(i) + sum += nr_iowait_cpu(i); + + return sum; +} + +#ifdef CONFIG_SMP + +/* + * sched_exec - execve() is a valuable balancing opportunity, because at + * this point the task has the smallest effective memory and cache + * footprint. + */ +void sched_exec(void) +{ + struct task_struct *p = current; + int dest_cpu; + + if (task_rq(p)->nr_running < 2) + return; + + dest_cpu = cpumask_any_and(p->cpus_ptr, &sched_rq_watermark[IDLE_WM]); + if ( dest_cpu < nr_cpu_ids) { +#ifdef CONFIG_SCHED_SMT + int smt = cpumask_any_and(p->cpus_ptr, &sched_sg_idle_mask); + if (smt < nr_cpu_ids) + dest_cpu = smt; +#endif + if (likely(cpu_active(dest_cpu))) { + struct migration_arg arg = { p, dest_cpu }; + + stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); + return; + } + } +} + +#endif + +DEFINE_PER_CPU(struct kernel_stat, kstat); +DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat); + +EXPORT_PER_CPU_SYMBOL(kstat); +EXPORT_PER_CPU_SYMBOL(kernel_cpustat); + +static inline void update_curr(struct rq *rq, struct task_struct *p) +{ + s64 ns = rq->clock_task - p->last_ran; + + p->sched_time += ns; + account_group_exec_runtime(p, ns); + + p->time_slice -= ns; + p->last_ran = rq->clock_task; +} + +/* + * Return accounted runtime for the task. + * Return separately the current's pending runtime that have not been + * accounted yet. + */ +unsigned long long task_sched_runtime(struct task_struct *p) +{ + unsigned long flags; + struct rq *rq; + raw_spinlock_t *lock; + u64 ns; + +#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) + /* + * 64-bit doesn't need locks to atomically read a 64-bit value. + * So we have a optimization chance when the task's delta_exec is 0. + * Reading ->on_cpu is racy, but this is ok. + * + * If we race with it leaving CPU, we'll take a lock. So we're correct. + * If we race with it entering CPU, unaccounted time is 0. This is + * indistinguishable from the read occurring a few cycles earlier. + * If we see ->on_cpu without ->on_rq, the task is leaving, and has + * been accounted, so we're correct here as well. + */ + if (!p->on_cpu || !task_on_rq_queued(p)) + return tsk_seruntime(p); +#endif + + rq = task_access_lock_irqsave(p, &lock, &flags); + /* + * Must be ->curr _and_ ->on_rq. If dequeued, we would + * project cycles that may never be accounted to this + * thread, breaking clock_gettime(). + */ + if (p == rq->curr && task_on_rq_queued(p)) { + update_rq_clock(rq); + update_curr(rq, p); + } + ns = tsk_seruntime(p); + task_access_unlock_irqrestore(p, lock, &flags); + + return ns; +} + +/* This manages tasks that have run out of timeslice during a scheduler_tick */ +static inline void scheduler_task_tick(struct rq *rq) +{ + struct task_struct *p = rq->curr; + + if (is_idle_task(p)) + return; + + update_curr(rq, p); + cpufreq_update_util(rq, 0); + + /* + * Tasks have less than RESCHED_NS of time slice left they will be + * rescheduled. + */ + if (p->time_slice >= RESCHED_NS) + return; + set_tsk_need_resched(p); + set_preempt_need_resched(); +} + +/* + * This function gets called by the timer code, with HZ frequency. + * We call it with interrupts disabled. + */ +void scheduler_tick(void) +{ + int cpu __maybe_unused = smp_processor_id(); + struct rq *rq = cpu_rq(cpu); + + sched_clock_tick(); + + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + + scheduler_task_tick(rq); + calc_global_load_tick(rq); + psi_task_tick(rq); + + rq->last_tick = rq->clock; + raw_spin_unlock(&rq->lock); + + perf_event_task_tick(); +} + +#ifdef CONFIG_SCHED_SMT +static inline int active_load_balance_cpu_stop(void *data) +{ + struct rq *rq = this_rq(); + struct task_struct *p = data; + cpumask_t tmp; + unsigned long flags; + + local_irq_save(flags); + + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); + + rq->active_balance = 0; + /* _something_ may have changed the task, double check again */ + if (task_on_rq_queued(p) && task_rq(p) == rq && + cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask)) { + int cpu = cpu_of(rq); + int dcpu = __best_mask_cpu(cpu, &tmp, + per_cpu(sched_cpu_llc_mask, cpu)); + rq = move_queued_task(rq, p, dcpu); + } + + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); + + local_irq_restore(flags); + + return 0; +} + +/* sg_balance_trigger - trigger slibing group balance for @cpu */ +static inline int sg_balance_trigger(const int cpu) +{ + struct rq *rq= cpu_rq(cpu); + unsigned long flags; + struct task_struct *curr; + int res; + + if (!raw_spin_trylock_irqsave(&rq->lock, flags)) + return 0; + curr = rq->curr; + res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\ + cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\ + (!rq->active_balance); + + if (res) + rq->active_balance = 1; + + raw_spin_unlock_irqrestore(&rq->lock, flags); + + if (res) + stop_one_cpu_nowait(cpu, active_load_balance_cpu_stop, + curr, &rq->active_balance_work); + return res; +} + +/* + * sg_balance_check - slibing group balance check for run queue @rq + */ +static inline void sg_balance_check(struct rq *rq) +{ + cpumask_t chk; + int cpu; + + /* exit when no sg in idle */ + if (cpumask_empty(&sched_sg_idle_mask)) + return; + + cpu = cpu_of(rq); + /* + * Only cpu in slibing idle group will do the checking and then + * find potential cpus which can migrate the current running task + */ + if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) && + cpumask_andnot(&chk, cpu_online_mask, &sched_rq_pending_mask) && + cpumask_andnot(&chk, &chk, &sched_rq_watermark[IDLE_WM])) { + int i, tried = 0; + + for_each_cpu_wrap(i, &chk, cpu) { + if (cpumask_subset(cpu_smt_mask(i), &chk)) { + if (sg_balance_trigger(i)) + return; + if (tried) + return; + tried++; + } + } + } +} +#endif /* CONFIG_SCHED_SMT */ + +#ifdef CONFIG_NO_HZ_FULL + +struct tick_work { + int cpu; + atomic_t state; + struct delayed_work work; +}; +/* Values for ->state, see diagram below. */ +#define TICK_SCHED_REMOTE_OFFLINE 0 +#define TICK_SCHED_REMOTE_OFFLINING 1 +#define TICK_SCHED_REMOTE_RUNNING 2 + +/* + * State diagram for ->state: + * + * + * TICK_SCHED_REMOTE_OFFLINE + * | ^ + * | | + * | | sched_tick_remote() + * | | + * | | + * +--TICK_SCHED_REMOTE_OFFLINING + * | ^ + * | | + * sched_tick_start() | | sched_tick_stop() + * | | + * V | + * TICK_SCHED_REMOTE_RUNNING + * + * + * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote() + * and sched_tick_start() are happy to leave the state in RUNNING. + */ + +static struct tick_work __percpu *tick_work_cpu; + +static void sched_tick_remote(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct tick_work *twork = container_of(dwork, struct tick_work, work); + int cpu = twork->cpu; + struct rq *rq = cpu_rq(cpu); + struct task_struct *curr; + unsigned long flags; + u64 delta; + int os; + + /* + * Handle the tick only if it appears the remote CPU is running in full + * dynticks mode. The check is racy by nature, but missing a tick or + * having one too much is no big deal because the scheduler tick updates + * statistics and checks timeslices in a time-independent way, regardless + * of when exactly it is running. + */ + if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) + goto out_requeue; + + raw_spin_lock_irqsave(&rq->lock, flags); + curr = rq->curr; + + if (is_idle_task(curr) || cpu_is_offline(cpu)) + goto out_unlock; + + update_rq_clock(rq); + delta = rq_clock_task(rq) - curr->last_ran; + + /* + * Make sure the next tick runs within a reasonable + * amount of time. + */ + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + scheduler_task_tick(rq); + +out_unlock: + raw_spin_unlock_irqrestore(&rq->lock, flags); + +out_requeue: + /* + * Run the remote tick once per second (1Hz). This arbitrary + * frequency is large enough to avoid overload but short enough + * to keep scheduler internal stats reasonably up to date. But + * first update state to reflect hotplug activity if required. + */ + os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING); + WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE); + if (os == TICK_SCHED_REMOTE_RUNNING) + queue_delayed_work(system_unbound_wq, dwork, HZ); +} + +static void sched_tick_start(int cpu) +{ + int os; + struct tick_work *twork; + + if (housekeeping_cpu(cpu, HK_FLAG_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); + + twork = per_cpu_ptr(tick_work_cpu, cpu); + os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING); + WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING); + if (os == TICK_SCHED_REMOTE_OFFLINE) { + twork->cpu = cpu; + INIT_DELAYED_WORK(&twork->work, sched_tick_remote); + queue_delayed_work(system_unbound_wq, &twork->work, HZ); + } +} + +#ifdef CONFIG_HOTPLUG_CPU +static void sched_tick_stop(int cpu) +{ + struct tick_work *twork; + + if (housekeeping_cpu(cpu, HK_FLAG_TICK)) + return; + + WARN_ON_ONCE(!tick_work_cpu); + + twork = per_cpu_ptr(tick_work_cpu, cpu); + cancel_delayed_work_sync(&twork->work); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +int __init sched_tick_offload_init(void) +{ + tick_work_cpu = alloc_percpu(struct tick_work); + BUG_ON(!tick_work_cpu); + return 0; +} + +#else /* !CONFIG_NO_HZ_FULL */ +static inline void sched_tick_start(int cpu) { } +static inline void sched_tick_stop(int cpu) { } +#endif + +#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \ + defined(CONFIG_PREEMPT_TRACER)) +/* + * If the value passed in is equal to the current preempt count + * then we just disabled preemption. Start timing the latency. + */ +static inline void preempt_latency_start(int val) +{ + if (preempt_count() == val) { + unsigned long ip = get_lock_parent_ip(); +#ifdef CONFIG_DEBUG_PREEMPT + current->preempt_disable_ip = ip; +#endif + trace_preempt_off(CALLER_ADDR0, ip); + } +} + +void preempt_count_add(int val) +{ +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? + */ + if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0))) + return; +#endif + __preempt_count_add(val); +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Spinlock count overflowing soon? + */ + DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= + PREEMPT_MASK - 10); +#endif + preempt_latency_start(val); +} +EXPORT_SYMBOL(preempt_count_add); +NOKPROBE_SYMBOL(preempt_count_add); + +/* + * If the value passed in equals to the current preempt count + * then we just enabled preemption. Stop timing the latency. + */ +static inline void preempt_latency_stop(int val) +{ + if (preempt_count() == val) + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); +} + +void preempt_count_sub(int val) +{ +#ifdef CONFIG_DEBUG_PREEMPT + /* + * Underflow? + */ + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) + return; + /* + * Is the spinlock portion underflowing? + */ + if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) && + !(preempt_count() & PREEMPT_MASK))) + return; +#endif + + preempt_latency_stop(val); + __preempt_count_sub(val); +} +EXPORT_SYMBOL(preempt_count_sub); +NOKPROBE_SYMBOL(preempt_count_sub); + +#else +static inline void preempt_latency_start(int val) { } +static inline void preempt_latency_stop(int val) { } +#endif + +static inline unsigned long get_preempt_disable_ip(struct task_struct *p) +{ +#ifdef CONFIG_DEBUG_PREEMPT + return p->preempt_disable_ip; +#else + return 0; +#endif +} + +/* + * Print scheduling while atomic bug: + */ +static noinline void __schedule_bug(struct task_struct *prev) +{ + /* Save this before calling printk(), since that will clobber it */ + unsigned long preempt_disable_ip = get_preempt_disable_ip(current); + + if (oops_in_progress) + return; + + printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n", + prev->comm, prev->pid, preempt_count()); + + debug_show_held_locks(prev); + print_modules(); + if (irqs_disabled()) + print_irqtrace_events(prev); + if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) + && in_atomic_preempt_off()) { + pr_err("Preemption disabled at:"); + print_ip_sym(preempt_disable_ip); + pr_cont("\n"); + } + if (panic_on_warn) + panic("scheduling while atomic\n"); + + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} + +/* + * Various schedule()-time debugging checks and statistics: + */ +static inline void schedule_debug(struct task_struct *prev, bool preempt) +{ +#ifdef CONFIG_SCHED_STACK_END_CHECK + if (task_stack_end_corrupted(prev)) + panic("corrupted stack end detected inside scheduler\n"); +#endif + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + if (!preempt && prev->state && prev->non_block_count) { + printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n", + prev->comm, prev->pid, prev->non_block_count); + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); + } +#endif + + if (unlikely(in_atomic_preempt_off())) { + __schedule_bug(prev); + preempt_count_set(PREEMPT_DISABLED); + } + rcu_sleep_check(); + + profile_hit(SCHED_PROFILING, __builtin_return_address(0)); + + schedstat_inc(this_rq()->sched_count); +} + +#ifdef CONFIG_SMP + +#define SCHED_RQ_NR_MIGRATION (32UL) +/* + * Migrate pending tasks in @rq to @dest_cpu + * Will try to migrate mininal of half of @rq nr_running tasks and + * SCHED_RQ_NR_MIGRATION to @dest_cpu + */ +static inline int +migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu) +{ + struct task_struct *p, *skip = rq->curr; + int nr_migrated = 0; + int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION); + + while (skip != rq->idle && nr_tries && + (p = rq_next_bmq_task(skip, rq)) != rq->idle) { + skip = rq_next_bmq_task(p, rq); + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) { + dequeue_task(p, rq, 0); + set_task_cpu(p, dest_cpu); + enqueue_task(p, dest_rq, 0); + nr_migrated++; + } + nr_tries--; + } + + return nr_migrated; +} + +static inline int take_other_rq_tasks(struct rq *rq, int cpu) +{ + struct cpumask *affinity_mask, *end_mask; + + if (cpumask_empty(&sched_rq_pending_mask)) + return 0; + + affinity_mask = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + end_mask = per_cpu(sched_cpu_affinity_end_mask, cpu); + do { + int i; + for_each_cpu_and(i, &sched_rq_pending_mask, affinity_mask) { + int nr_migrated; + struct rq *src_rq; + + src_rq = cpu_rq(i); + if (!do_raw_spin_trylock(&src_rq->lock)) + continue; + spin_acquire(&src_rq->lock.dep_map, + SINGLE_DEPTH_NESTING, 1, _RET_IP_); + + nr_migrated = migrate_pending_tasks(src_rq, rq, cpu); + + spin_release(&src_rq->lock.dep_map, _RET_IP_); + do_raw_spin_unlock(&src_rq->lock); + + if (nr_migrated) { + cpufreq_update_util(rq, 0); + return 1; + } + } + } while (++affinity_mask < end_mask); + + return 0; +} +#endif + +/* + * Timeslices below RESCHED_NS are considered as good as expired as there's no + * point rescheduling when there's so little time left. + */ +static inline void check_curr(struct task_struct *p, struct rq *rq) +{ + if (rq->idle == p) + return; + + update_curr(rq, p); + + if (p->time_slice < RESCHED_NS) { + p->time_slice = sched_timeslice_ns; + if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) { + if (SCHED_RR != p->policy) + deboost_task(p); + requeue_task(p, rq); + } + } +} + +static inline struct task_struct * +choose_next_task(struct rq *rq, int cpu, struct task_struct *prev) +{ + struct task_struct *next; + + if (unlikely(rq->skip)) { + next = rq_runnable_task(rq); +#ifdef CONFIG_SMP + if (likely(rq->online)) + if (next == rq->idle && take_other_rq_tasks(rq, cpu)) + next = rq_runnable_task(rq); +#endif + rq->skip = NULL; + return next; + } + + next = rq_first_bmq_task(rq); +#ifdef CONFIG_SMP + if (likely(rq->online)) + if (next == rq->idle && take_other_rq_tasks(rq, cpu)) + return rq_first_bmq_task(rq); +#endif + return next; +} + +static inline void set_rq_task(struct rq *rq, struct task_struct *p) +{ + p->last_ran = rq->clock_task; + + if (unlikely(sched_timeslice_ns == p->time_slice)) + rq->last_ts_switch = rq->clock; +#ifdef CONFIG_HIGH_RES_TIMERS + if (p != rq->idle) + hrtick_start(rq, p->time_slice); +#endif +} + +/* + * schedule() is the main scheduler function. + * + * The main means of driving the scheduler and thus entering this function are: + * + * 1. Explicit blocking: mutex, semaphore, waitqueue, etc. + * + * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return + * paths. For example, see arch/x86/entry_64.S. + * + * To drive preemption between tasks, the scheduler sets the flag in timer + * interrupt handler scheduler_tick(). + * + * 3. Wakeups don't really cause entry into schedule(). They add a + * task to the run-queue and that's it. + * + * Now, if the new task added to the run-queue preempts the current + * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets + * called on the nearest possible occasion: + * + * - If the kernel is preemptible (CONFIG_PREEMPTION=y): + * + * - in syscall or exception context, at the next outmost + * preempt_enable(). (this might be as soon as the wake_up()'s + * spin_unlock()!) + * + * - in IRQ context, return from interrupt-handler to + * preemptible context + * + * - If the kernel is not preemptible (CONFIG_PREEMPTION is not set) + * then at the next: + * + * - cond_resched() call + * - explicit schedule() call + * - return from syscall or exception to user-space + * - return from interrupt-handler to user-space + * + * WARNING: must be called with preemption disabled! + */ +static void __sched notrace __schedule(bool preempt) +{ + struct task_struct *prev, *next; + unsigned long *switch_count; + struct rq *rq; + int cpu; + + cpu = smp_processor_id(); + rq = cpu_rq(cpu); + prev = rq->curr; + + schedule_debug(prev, preempt); + + /* by passing sched_feat(HRTICK) checking which BMQ doesn't support */ + hrtick_clear(rq); + + local_irq_disable(); + rcu_note_context_switch(preempt); + + /* + * Make sure that signal_pending_state()->signal_pending() below + * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) + * done by the caller to avoid the race with signal_wake_up(). + * + * The membarrier system call requires a full memory barrier + * after coming from user-space, before storing to rq->curr. + */ + raw_spin_lock(&rq->lock); + smp_mb__after_spinlock(); + + update_rq_clock(rq); + + switch_count = &prev->nivcsw; + if (!preempt && prev->state) { + if (signal_pending_state(prev->state, prev)) { + prev->state = TASK_RUNNING; + } else { + if (rq_switch_time(rq) < boost_threshold(prev)) + boost_task(prev); + deactivate_task(prev, rq); + + if (prev->in_iowait) { + atomic_inc(&rq->nr_iowait); + delayacct_blkio_start(); + } + } + switch_count = &prev->nvcsw; + } + + clear_tsk_need_resched(prev); + clear_preempt_need_resched(); + + check_curr(prev, rq); + + next = choose_next_task(rq, cpu, prev); + + set_rq_task(rq, next); + + if (prev != next) { + if (MAX_PRIO == next->prio) + schedstat_inc(rq->sched_goidle); + + /* + * RCU users of rcu_dereference(rq->curr) may not see + * changes to task_struct made by pick_next_task(). + */ + RCU_INIT_POINTER(rq->curr, next); + /* + * The membarrier system call requires each architecture + * to have a full memory barrier after updating + * rq->curr, before returning to user-space. + * + * Here are the schemes providing that barrier on the + * various architectures: + * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. + * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. + * - finish_lock_switch() for weakly-ordered + * architectures where spin_unlock is a full barrier, + * - switch_to() for arm64 (weakly-ordered, spin_unlock + * is a RELEASE barrier), + */ + ++*switch_count; + rq->nr_switches++; + rq->last_ts_switch = rq->clock; + + trace_sched_switch(preempt, prev, next); + + /* Also unlocks the rq: */ + rq = context_switch(rq, prev, next); +#ifdef CONFIG_SCHED_SMT + sg_balance_check(rq); +#endif + } else + raw_spin_unlock_irq(&rq->lock); +} + +void __noreturn do_task_dead(void) +{ + /* Causes final put_task_struct in finish_task_switch(): */ + set_special_state(TASK_DEAD); + + /* Tell freezer to ignore us: */ + current->flags |= PF_NOFREEZE; + + __schedule(false); + BUG(); + + /* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */ + for (;;) + cpu_relax(); +} + +static inline void sched_submit_work(struct task_struct *tsk) +{ + if (!tsk->state) + return; + + /* + * If a worker went to sleep, notify and ask workqueue whether + * it wants to wake up a task to maintain concurrency. + * As this function is called inside the schedule() context, + * we disable preemption to avoid it calling schedule() again + * in the possible wakeup of a kworker. + */ + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + preempt_disable(); + if (tsk->flags & PF_WQ_WORKER) + wq_worker_sleeping(tsk); + else + io_wq_worker_sleeping(tsk); + preempt_enable_no_resched(); + } + + if (tsk_is_pi_blocked(tsk)) + return; + + /* + * If we are going to sleep and we have plugged IO queued, + * make sure to submit it to avoid deadlocks. + */ + if (blk_needs_flush_plug(tsk)) + blk_schedule_flush_plug(tsk); +} + +static void sched_update_worker(struct task_struct *tsk) +{ + if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) { + if (tsk->flags & PF_WQ_WORKER) + wq_worker_running(tsk); + else + io_wq_worker_running(tsk); + } +} + +asmlinkage __visible void __sched schedule(void) +{ + struct task_struct *tsk = current; + + sched_submit_work(tsk); + do { + preempt_disable(); + __schedule(false); + sched_preempt_enable_no_resched(); + } while (need_resched()); + sched_update_worker(tsk); +} +EXPORT_SYMBOL(schedule); + +/* + * synchronize_rcu_tasks() makes sure that no task is stuck in preempted + * state (have scheduled out non-voluntarily) by making sure that all + * tasks have either left the run queue or have gone into user space. + * As idle tasks do not do either, they must not ever be preempted + * (schedule out non-voluntarily). + * + * schedule_idle() is similar to schedule_preempt_disable() except that it + * never enables preemption because it does not call sched_submit_work(). + */ +void __sched schedule_idle(void) +{ + /* + * As this skips calling sched_submit_work(), which the idle task does + * regardless because that function is a nop when the task is in a + * TASK_RUNNING state, make sure this isn't used someplace that the + * current task can be in any other state. Note, idle is always in the + * TASK_RUNNING state. + */ + WARN_ON_ONCE(current->state); + do { + __schedule(false); + } while (need_resched()); +} + +#ifdef CONFIG_CONTEXT_TRACKING +asmlinkage __visible void __sched schedule_user(void) +{ + /* + * If we come here after a random call to set_need_resched(), + * or we have been woken up remotely but the IPI has not yet arrived, + * we haven't yet exited the RCU idle mode. Do it here manually until + * we find a better solution. + * + * NB: There are buggy callers of this function. Ideally we + * should warn if prev_state != CONTEXT_USER, but that will trigger + * too frequently to make sense yet. + */ + enum ctx_state prev_state = exception_enter(); + schedule(); + exception_exit(prev_state); +} +#endif + +/** + * schedule_preempt_disabled - called with preemption disabled + * + * Returns with preemption disabled. Note: preempt_count must be 1 + */ +void __sched schedule_preempt_disabled(void) +{ + sched_preempt_enable_no_resched(); + schedule(); + preempt_disable(); +} + +static void __sched notrace preempt_schedule_common(void) +{ + do { + /* + * Because the function tracer can trace preempt_count_sub() + * and it also uses preempt_enable/disable_notrace(), if + * NEED_RESCHED is set, the preempt_enable_notrace() called + * by the function tracer will call this function again and + * cause infinite recursion. + * + * Preemption must be disabled here before the function + * tracer can trace. Break up preempt_disable() into two + * calls. One to disable preemption without fear of being + * traced. The other to still record the preemption latency, + * which can also be traced by the function tracer. + */ + preempt_disable_notrace(); + preempt_latency_start(1); + __schedule(true); + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + + /* + * Check again in case we missed a preemption opportunity + * between schedule and now. + */ + } while (need_resched()); +} + +#ifdef CONFIG_PREEMPTION +/* + * This is the entry point to schedule() from in-kernel preemption + * off of preempt_enable. + */ +asmlinkage __visible void __sched notrace preempt_schedule(void) +{ + /* + * If there is a non-zero preempt_count or interrupts are disabled, + * we do not want to preempt the current task. Just return.. + */ + if (likely(!preemptible())) + return; + + preempt_schedule_common(); +} +NOKPROBE_SYMBOL(preempt_schedule); +EXPORT_SYMBOL(preempt_schedule); + +/** + * preempt_schedule_notrace - preempt_schedule called by tracing + * + * The tracing infrastructure uses preempt_enable_notrace to prevent + * recursion and tracing preempt enabling caused by the tracing + * infrastructure itself. But as tracing can happen in areas coming + * from userspace or just about to enter userspace, a preempt enable + * can occur before user_exit() is called. This will cause the scheduler + * to be called when the system is still in usermode. + * + * To prevent this, the preempt_enable_notrace will use this function + * instead of preempt_schedule() to exit user context if needed before + * calling the scheduler. + */ +asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) +{ + enum ctx_state prev_ctx; + + if (likely(!preemptible())) + return; + + do { + /* + * Because the function tracer can trace preempt_count_sub() + * and it also uses preempt_enable/disable_notrace(), if + * NEED_RESCHED is set, the preempt_enable_notrace() called + * by the function tracer will call this function again and + * cause infinite recursion. + * + * Preemption must be disabled here before the function + * tracer can trace. Break up preempt_disable() into two + * calls. One to disable preemption without fear of being + * traced. The other to still record the preemption latency, + * which can also be traced by the function tracer. + */ + preempt_disable_notrace(); + preempt_latency_start(1); + /* + * Needs preempt disabled in case user_exit() is traced + * and the tracer calls preempt_enable_notrace() causing + * an infinite recursion. + */ + prev_ctx = exception_enter(); + __schedule(true); + exception_exit(prev_ctx); + + preempt_latency_stop(1); + preempt_enable_no_resched_notrace(); + } while (need_resched()); +} +EXPORT_SYMBOL_GPL(preempt_schedule_notrace); + +#endif /* CONFIG_PREEMPTION */ + +/* + * This is the entry point to schedule() from kernel preemption + * off of irq context. + * Note, that this is called and return with irqs disabled. This will + * protect us against recursive calling from irq. + */ +asmlinkage __visible void __sched preempt_schedule_irq(void) +{ + enum ctx_state prev_state; + + /* Catch callers which need to be fixed */ + BUG_ON(preempt_count() || !irqs_disabled()); + + prev_state = exception_enter(); + + do { + preempt_disable(); + local_irq_enable(); + __schedule(true); + local_irq_disable(); + sched_preempt_enable_no_resched(); + } while (need_resched()); + + exception_exit(prev_state); +} + +int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, + void *key) +{ + return try_to_wake_up(curr->private, mode, wake_flags); +} +EXPORT_SYMBOL(default_wake_function); + +static inline void check_task_changed(struct rq *rq, struct task_struct *p) +{ + /* Trigger resched if task sched_prio has been modified. */ + if (task_on_rq_queued(p) && task_sched_prio(p) != p->bmq_idx) { + requeue_task(p, rq); + check_preempt_curr(rq); + } +} + +#ifdef CONFIG_RT_MUTEXES + +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) +{ + if (pi_task) + prio = min(prio, pi_task->prio); + + return prio; +} + +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + struct task_struct *pi_task = rt_mutex_get_top_task(p); + + return __rt_effective_prio(pi_task, prio); +} + +/* + * rt_mutex_setprio - set the current priority of a task + * @p: task to boost + * @pi_task: donor task + * + * This function changes the 'effective' priority of a task. It does + * not touch ->normal_prio like __setscheduler(). + * + * Used by the rt_mutex code to implement priority inheritance + * logic. Call site only calls if the priority of the task changed. + */ +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) +{ + int prio; + struct rq *rq; + raw_spinlock_t *lock; + + /* XXX used to be waiter->prio, not waiter->task->prio */ + prio = __rt_effective_prio(pi_task, p->normal_prio); + + /* + * If nothing changed; bail early. + */ + if (p->pi_top_task == pi_task && prio == p->prio) + return; + + rq = __task_access_lock(p, &lock); + /* + * Set under pi_lock && rq->lock, such that the value can be used under + * either lock. + * + * Note that there is loads of tricky to make this pointer cache work + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to + * ensure a task is de-boosted (pi_task is set to NULL) before the + * task is allowed to run again (and can exit). This ensures the pointer + * points to a blocked task -- which guaratees the task is present. + */ + p->pi_top_task = pi_task; + + /* + * For FIFO/RR we only need to set prio, if that matches we're done. + */ + if (prio == p->prio) + goto out_unlock; + + /* + * Idle task boosting is a nono in general. There is one + * exception, when PREEMPT_RT and NOHZ is active: + * + * The idle task calls get_next_timer_interrupt() and holds + * the timer wheel base->lock on the CPU and another CPU wants + * to access the timer (probably to cancel it). We can safely + * ignore the boosting request, as the idle CPU runs this code + * with interrupts disabled and will complete the lock + * protected section without being interrupted. So there is no + * real need to boost. + */ + if (unlikely(p == rq->idle)) { + WARN_ON(p != rq->curr); + WARN_ON(p->pi_blocked_on); + goto out_unlock; + } + + trace_sched_pi_setprio(p, pi_task); + p->prio = prio; + + check_task_changed(rq, p); +out_unlock: + __task_access_unlock(p, lock); +} +#else +static inline int rt_effective_prio(struct task_struct *p, int prio) +{ + return prio; +} +#endif + +void set_user_nice(struct task_struct *p, long nice) +{ + unsigned long flags; + struct rq *rq; + raw_spinlock_t *lock; + + if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) + return; + /* + * We have to be careful, if called from sys_setpriority(), + * the task might be in the middle of scheduling on another CPU. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + rq = __task_access_lock(p, &lock); + + p->static_prio = NICE_TO_PRIO(nice); + /* + * The RT priorities are set via sched_setscheduler(), but we still + * allow the 'normal' nice value to be set - but as expected + * it wont have any effect on scheduling until the task is + * not SCHED_NORMAL/SCHED_BATCH: + */ + if (task_has_rt_policy(p)) + goto out_unlock; + + p->prio = effective_prio(p); + check_task_changed(rq, p); +out_unlock: + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); +} +EXPORT_SYMBOL(set_user_nice); + +/* + * can_nice - check if a task can reduce its nice value + * @p: task + * @nice: nice value + */ +int can_nice(const struct task_struct *p, const int nice) +{ + /* Convert nice value [19,-20] to rlimit style value [1,40] */ + int nice_rlim = nice_to_rlimit(nice); + + return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || + capable(CAP_SYS_NICE)); +} + +#ifdef __ARCH_WANT_SYS_NICE + +/* + * sys_nice - change the priority of the current process. + * @increment: priority increment + * + * sys_setpriority is a more generic, but much slower function that + * does similar things. + */ +SYSCALL_DEFINE1(nice, int, increment) +{ + long nice, retval; + + /* + * Setpriority might change our priority at the same moment. + * We don't have to worry. Conceptually one call occurs first + * and we have a single winner. + */ + + increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH); + nice = task_nice(current) + increment; + + nice = clamp_val(nice, MIN_NICE, MAX_NICE); + if (increment < 0 && !can_nice(current, nice)) + return -EPERM; + + retval = security_task_setnice(current, nice); + if (retval) + return retval; + + set_user_nice(current, nice); + return 0; +} + +#endif + +/** + * task_prio - return the priority value of a given task. + * @p: the task in question. + * + * Return: The priority value as seen by users in /proc. + * RT tasks are offset by -100. Normal tasks are centered around 1, value goes + * from 0(SCHED_ISO) up to 82 (nice +19 SCHED_IDLE). + */ +int task_prio(const struct task_struct *p) +{ + if (p->prio < MAX_RT_PRIO) + return (p->prio - MAX_RT_PRIO); + return (p->prio - MAX_RT_PRIO + p->boost_prio); +} + +/** + * idle_cpu - is a given CPU idle currently? + * @cpu: the processor in question. + * + * Return: 1 if the CPU is currently idle. 0 otherwise. + */ +int idle_cpu(int cpu) +{ + return cpu_curr(cpu) == cpu_rq(cpu)->idle; +} + +/** + * idle_task - return the idle task for a given CPU. + * @cpu: the processor in question. + * + * Return: The idle task for the cpu @cpu. + */ +struct task_struct *idle_task(int cpu) +{ + return cpu_rq(cpu)->idle; +} + +/** + * find_process_by_pid - find a process with a matching PID value. + * @pid: the pid in question. + * + * The task of @pid, if found. %NULL otherwise. + */ +static inline struct task_struct *find_process_by_pid(pid_t pid) +{ + return pid ? find_task_by_vpid(pid) : current; +} + +/* + * sched_setparam() passes in -1 for its policy, to let the functions + * it calls know not to change it. + */ +#define SETPARAM_POLICY -1 + +static void __setscheduler_params(struct task_struct *p, + const struct sched_attr *attr) +{ + int policy = attr->sched_policy; + + if (policy == SETPARAM_POLICY) + policy = p->policy; + + p->policy = policy; + + /* + * allow normal nice value to be set, but will not have any + * effect on scheduling until the task not SCHED_NORMAL/ + * SCHED_BATCH + */ + p->static_prio = NICE_TO_PRIO(attr->sched_nice); + + /* + * __sched_setscheduler() ensures attr->sched_priority == 0 when + * !rt_policy. Always setting this ensures that things like + * getparam()/getattr() don't report silly values for !rt tasks. + */ + p->rt_priority = attr->sched_priority; + p->normal_prio = normal_prio(p); +} + +/* Actually do priority change: must hold rq lock. */ +static void __setscheduler(struct rq *rq, struct task_struct *p, + const struct sched_attr *attr, bool keep_boost) +{ + __setscheduler_params(p, attr); + + /* + * Keep a potential priority boosting if called from + * sched_setscheduler(). + */ + p->prio = normal_prio(p); + if (keep_boost) + p->prio = rt_effective_prio(p, p->prio); +} + +/* + * check the target process has a UID that matches the current process's + */ +static bool check_same_owner(struct task_struct *p) +{ + const struct cred *cred = current_cred(), *pcred; + bool match; + + rcu_read_lock(); + pcred = __task_cred(p); + match = (uid_eq(cred->euid, pcred->euid) || + uid_eq(cred->euid, pcred->uid)); + rcu_read_unlock(); + return match; +} + +static int __sched_setscheduler(struct task_struct *p, + const struct sched_attr *attr, + bool user, bool pi) +{ + const struct sched_attr dl_squash_attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_FIFO, + .sched_nice = 0, + .sched_priority = 99, + }; + int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + int retval, oldpolicy = -1; + int policy = attr->sched_policy; + unsigned long flags; + struct rq *rq; + int reset_on_fork; + raw_spinlock_t *lock; + + /* The pi code expects interrupts enabled */ + BUG_ON(pi && in_interrupt()); + + /* + * BMQ supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO + */ + if (unlikely(SCHED_DEADLINE == policy)) { + attr = &dl_squash_attr; + policy = attr->sched_policy; + newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + } +recheck: + /* Double check policy once rq lock held */ + if (policy < 0) { + reset_on_fork = p->sched_reset_on_fork; + policy = oldpolicy = p->policy; + } else { + reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK); + + if (policy > SCHED_IDLE) + return -EINVAL; + } + + if (attr->sched_flags & ~(SCHED_FLAG_ALL)) + return -EINVAL; + + /* + * Valid priorities for SCHED_FIFO and SCHED_RR are + * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL and + * SCHED_BATCH and SCHED_IDLE is 0. + */ + if (attr->sched_priority < 0 || + (p->mm && attr->sched_priority > MAX_USER_RT_PRIO - 1) || + (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1)) + return -EINVAL; + if ((SCHED_RR == policy || SCHED_FIFO == policy) != + (attr->sched_priority != 0)) + return -EINVAL; + + /* + * Allow unprivileged RT tasks to decrease priority: + */ + if (user && !capable(CAP_SYS_NICE)) { + if (SCHED_FIFO == policy || SCHED_RR == policy) { + unsigned long rlim_rtprio = + task_rlimit(p, RLIMIT_RTPRIO); + + /* Can't set/change the rt policy */ + if (policy != p->policy && !rlim_rtprio) + return -EPERM; + + /* Can't increase priority */ + if (attr->sched_priority > p->rt_priority && + attr->sched_priority > rlim_rtprio) + return -EPERM; + } + + /* Can't change other user's priorities */ + if (!check_same_owner(p)) + return -EPERM; + + /* Normal users shall not reset the sched_reset_on_fork flag */ + if (p->sched_reset_on_fork && !reset_on_fork) + return -EPERM; + } + + if (user) { + retval = security_task_setscheduler(p); + if (retval) + return retval; + } + + if (pi) + cpuset_read_lock(); + + /* + * Make sure no PI-waiters arrive (or leave) while we are + * changing the priority of the task: + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); + + /* + * To be able to change p->policy safely, task_access_lock() + * must be called. + * IF use task_access_lock() here: + * For the task p which is not running, reading rq->stop is + * racy but acceptable as ->stop doesn't change much. + * An enhancemnet can be made to read rq->stop saftly. + */ + rq = __task_access_lock(p, &lock); + + /* + * Changing the policy of the stop threads its a very bad idea + */ + if (p == rq->stop) { + retval = -EINVAL; + goto unlock; + } + + /* + * If not changing anything there's no need to proceed further: + */ + if (unlikely(policy == p->policy)) { + if (rt_policy(policy) && attr->sched_priority != p->rt_priority) + goto change; + if (!rt_policy(policy) && + NICE_TO_PRIO(attr->sched_nice) != p->static_prio) + goto change; + + p->sched_reset_on_fork = reset_on_fork; + retval = 0; + goto unlock; + } +change: + + /* Re-check policy now with rq lock held */ + if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { + policy = oldpolicy = -1; + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + if (pi) + cpuset_read_unlock(); + goto recheck; + } + + p->sched_reset_on_fork = reset_on_fork; + + if (pi) { + /* + * Take priority boosted tasks into account. If the new + * effective priority is unchanged, we just store the new + * normal parameters and do not touch the scheduler class and + * the runqueue. This will be done when the task deboost + * itself. + */ + if (rt_effective_prio(p, newprio) == p->prio) { + __setscheduler_params(p, attr); + retval = 0; + goto unlock; + } + } + + __setscheduler(rq, p, attr, pi); + + check_task_changed(rq, p); + + /* Avoid rq from going away on us: */ + preempt_disable(); + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + if (pi) { + cpuset_read_unlock(); + rt_mutex_adjust_pi(p); + } + + preempt_enable(); + + return 0; + +unlock: + __task_access_unlock(p, lock); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + if (pi) + cpuset_read_unlock(); + return retval; +} + +static int _sched_setscheduler(struct task_struct *p, int policy, + const struct sched_param *param, bool check) +{ + struct sched_attr attr = { + .sched_policy = policy, + .sched_priority = param->sched_priority, + .sched_nice = PRIO_TO_NICE(p->static_prio), + }; + + /* Fixup the legacy SCHED_RESET_ON_FORK hack. */ + if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { + attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; + policy &= ~SCHED_RESET_ON_FORK; + attr.sched_policy = policy; + } + + return __sched_setscheduler(p, &attr, check, true); +} + +/** + * sched_setscheduler - change the scheduling policy and/or RT priority of a thread. + * @p: the task in question. + * @policy: new policy. + * @param: structure containing the new RT priority. + * + * Return: 0 on success. An error code otherwise. + * + * NOTE that the task may be already dead. + */ +int sched_setscheduler(struct task_struct *p, int policy, + const struct sched_param *param) +{ + return _sched_setscheduler(p, policy, param, true); +} + +EXPORT_SYMBOL_GPL(sched_setscheduler); + +int sched_setattr(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, true, true); +} +EXPORT_SYMBOL_GPL(sched_setattr); + +int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr) +{ + return __sched_setscheduler(p, attr, false, true); +} + +/** + * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. + * @p: the task in question. + * @policy: new policy. + * @param: structure containing the new RT priority. + * + * Just like sched_setscheduler, only don't bother checking if the + * current context has permission. For example, this is needed in + * stop_machine(): we create temporary high priority worker threads, + * but our caller might not have that capability. + * + * Return: 0 on success. An error code otherwise. + */ +int sched_setscheduler_nocheck(struct task_struct *p, int policy, + const struct sched_param *param) +{ + return _sched_setscheduler(p, policy, param, false); +} +EXPORT_SYMBOL_GPL(sched_setscheduler_nocheck); + +static int +do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +{ + struct sched_param lparam; + struct task_struct *p; + int retval; + + if (!param || pid < 0) + return -EINVAL; + if (copy_from_user(&lparam, param, sizeof(struct sched_param))) + return -EFAULT; + + rcu_read_lock(); + retval = -ESRCH; + p = find_process_by_pid(pid); + if (likely(p)) + get_task_struct(p); + rcu_read_unlock(); + + if (likely(p)) { + retval = sched_setscheduler(p, policy, &lparam); + put_task_struct(p); + } + + return retval; +} + +/* + * Mimics kernel/events/core.c perf_copy_attr(). + */ +static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr) +{ + u32 size; + int ret; + + /* Zero the full structure, so that a short copy will be nice: */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + /* ABI compatibility quirk: */ + if (!size) + size = SCHED_ATTR_SIZE_VER0; + + if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE) + goto err_size; + + ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size); + if (ret) { + if (ret == -E2BIG) + goto err_size; + return ret; + } + + /* + * XXX: Do we want to be lenient like existing syscalls; or do we want + * to be strict and return an error on out-of-bounds values? + */ + attr->sched_nice = clamp(attr->sched_nice, -20, 19); + + /* sched/core.c uses zero here but we already know ret is zero */ + return 0; + +err_size: + put_user(sizeof(*attr), &uattr->size); + return -E2BIG; +} + +/** + * sys_sched_setscheduler - set/change the scheduler policy and RT priority + * @pid: the pid in question. + * @policy: new policy. + * + * Return: 0 on success. An error code otherwise. + * @param: structure containing the new RT priority. + */ +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param) +{ + if (policy < 0) + return -EINVAL; + + return do_sched_setscheduler(pid, policy, param); +} + +/** + * sys_sched_setparam - set/change the RT priority of a thread + * @pid: the pid in question. + * @param: structure containing the new RT priority. + * + * Return: 0 on success. An error code otherwise. + */ +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) +{ + return do_sched_setscheduler(pid, SETPARAM_POLICY, param); +} + +/** + * sys_sched_setattr - same as above, but with extended sched_attr + * @pid: the pid in question. + * @uattr: structure containing the extended parameters. + */ +SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, + unsigned int, flags) +{ + struct sched_attr attr; + struct task_struct *p; + int retval; + + if (!uattr || pid < 0 || flags) + return -EINVAL; + + retval = sched_copy_attr(uattr, &attr); + if (retval) + return retval; + + if ((int)attr.sched_policy < 0) + return -EINVAL; + + rcu_read_lock(); + retval = -ESRCH; + p = find_process_by_pid(pid); + if (p != NULL) + retval = sched_setattr(p, &attr); + rcu_read_unlock(); + + return retval; +} + +/** + * sys_sched_getscheduler - get the policy (scheduling class) of a thread + * @pid: the pid in question. + * + * Return: On success, the policy of the thread. Otherwise, a negative error + * code. + */ +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) +{ + struct task_struct *p; + int retval = -EINVAL; + + if (pid < 0) + goto out_nounlock; + + retval = -ESRCH; + rcu_read_lock(); + p = find_process_by_pid(pid); + if (p) { + retval = security_task_getscheduler(p); + if (!retval) + retval = p->policy; + } + rcu_read_unlock(); + +out_nounlock: + return retval; +} + +/** + * sys_sched_getscheduler - get the RT priority of a thread + * @pid: the pid in question. + * @param: structure containing the RT priority. + * + * Return: On success, 0 and the RT priority is in @param. Otherwise, an error + * code. + */ +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) +{ + struct sched_param lp = { .sched_priority = 0 }; + struct task_struct *p; + int retval = -EINVAL; + + if (!param || pid < 0) + goto out_nounlock; + + rcu_read_lock(); + p = find_process_by_pid(pid); + retval = -ESRCH; + if (!p) + goto out_unlock; + + retval = security_task_getscheduler(p); + if (retval) + goto out_unlock; + + if (task_has_rt_policy(p)) + lp.sched_priority = p->rt_priority; + rcu_read_unlock(); + + /* + * This one might sleep, we cannot do it with a spinlock held ... + */ + retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0; + +out_nounlock: + return retval; + +out_unlock: + rcu_read_unlock(); + return retval; +} + +/* + * Copy the kernel size attribute structure (which might be larger + * than what user-space knows about) to user-space. + * + * Note that all cases are valid: user-space buffer can be larger or + * smaller than the kernel-space buffer. The usual case is that both + * have the same size. + */ +static int +sched_attr_copy_to_user(struct sched_attr __user *uattr, + struct sched_attr *kattr, + unsigned int usize) +{ + unsigned int ksize = sizeof(*kattr); + + if (!access_ok(uattr, usize)) + return -EFAULT; + + /* + * sched_getattr() ABI forwards and backwards compatibility: + * + * If usize == ksize then we just copy everything to user-space and all is good. + * + * If usize < ksize then we only copy as much as user-space has space for, + * this keeps ABI compatibility as well. We skip the rest. + * + * If usize > ksize then user-space is using a newer version of the ABI, + * which part the kernel doesn't know about. Just ignore it - tooling can + * detect the kernel's knowledge of attributes from the attr->size value + * which is set to ksize in this case. + */ + kattr->size = min(usize, ksize); + + if (copy_to_user(uattr, kattr, kattr->size)) + return -EFAULT; + + return 0; +} + +/** + * sys_sched_getattr - similar to sched_getparam, but with sched_attr + * @pid: the pid in question. + * @uattr: structure containing the extended parameters. + * @usize: sizeof(attr) for fwd/bwd comp. + * @flags: for future extension. + */ +SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, + unsigned int, usize, unsigned int, flags) +{ + struct sched_attr kattr = { }; + struct task_struct *p; + int retval; + + if (!uattr || pid < 0 || usize > PAGE_SIZE || + usize < SCHED_ATTR_SIZE_VER0 || flags) + return -EINVAL; + + rcu_read_lock(); + p = find_process_by_pid(pid); + retval = -ESRCH; + if (!p) + goto out_unlock; + + retval = security_task_getscheduler(p); + if (retval) + goto out_unlock; + + kattr.sched_policy = p->policy; + if (p->sched_reset_on_fork) + kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; + if (task_has_rt_policy(p)) + kattr.sched_priority = p->rt_priority; + else + kattr.sched_nice = task_nice(p); + +#ifdef CONFIG_UCLAMP_TASK + kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value; + kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value; +#endif + + rcu_read_unlock(); + + return sched_attr_copy_to_user(uattr, &kattr, usize); + +out_unlock: + rcu_read_unlock(); + return retval; +} + +long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +{ + cpumask_var_t cpus_allowed, new_mask; + struct task_struct *p; + int retval; + + get_online_cpus(); + rcu_read_lock(); + + p = find_process_by_pid(pid); + if (!p) { + rcu_read_unlock(); + put_online_cpus(); + return -ESRCH; + } + + /* Prevent p going away */ + get_task_struct(p); + rcu_read_unlock(); + + if (p->flags & PF_NO_SETAFFINITY) { + retval = -EINVAL; + goto out_put_task; + } + if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_put_task; + } + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_free_cpus_allowed; + } + retval = -EPERM; + if (!check_same_owner(p)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); + goto out_unlock; + } + rcu_read_unlock(); + } + + retval = security_task_setscheduler(p); + if (retval) + goto out_unlock; + + cpuset_cpus_allowed(p, cpus_allowed); + cpumask_and(new_mask, in_mask, cpus_allowed); +again: + retval = __set_cpus_allowed_ptr(p, new_mask, true); + + if (!retval) { + cpuset_cpus_allowed(p, cpus_allowed); + if (!cpumask_subset(new_mask, cpus_allowed)) { + /* + * We must have raced with a concurrent cpuset + * update. Just reset the cpus_allowed to the + * cpuset's cpus_allowed + */ + cpumask_copy(new_mask, cpus_allowed); + goto again; + } + } +out_unlock: + free_cpumask_var(new_mask); +out_free_cpus_allowed: + free_cpumask_var(cpus_allowed); +out_put_task: + put_task_struct(p); + put_online_cpus(); + return retval; +} + +static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, + struct cpumask *new_mask) +{ + if (len < cpumask_size()) + cpumask_clear(new_mask); + else if (len > cpumask_size()) + len = cpumask_size(); + + return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; +} + +/** + * sys_sched_setaffinity - set the CPU affinity of a process + * @pid: pid of the process + * @len: length in bytes of the bitmask pointed to by user_mask_ptr + * @user_mask_ptr: user-space pointer to the new CPU mask + * + * Return: 0 on success. An error code otherwise. + */ +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) +{ + cpumask_var_t new_mask; + int retval; + + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) + return -ENOMEM; + + retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); + if (retval == 0) + retval = sched_setaffinity(pid, new_mask); + free_cpumask_var(new_mask); + return retval; +} + +long sched_getaffinity(pid_t pid, cpumask_t *mask) +{ + struct task_struct *p; + raw_spinlock_t *lock; + unsigned long flags; + int retval; + + rcu_read_lock(); + + retval = -ESRCH; + p = find_process_by_pid(pid); + if (!p) + goto out_unlock; + + retval = security_task_getscheduler(p); + if (retval) + goto out_unlock; + + task_access_lock_irqsave(p, &lock, &flags); + cpumask_and(mask, &p->cpus_mask, cpu_active_mask); + task_access_unlock_irqrestore(p, lock, &flags); + +out_unlock: + rcu_read_unlock(); + + return retval; +} + +/** + * sys_sched_getaffinity - get the CPU affinity of a process + * @pid: pid of the process + * @len: length in bytes of the bitmask pointed to by user_mask_ptr + * @user_mask_ptr: user-space pointer to hold the current CPU mask + * + * Return: size of CPU mask copied to user_mask_ptr on success. An + * error code otherwise. + */ +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) +{ + int ret; + cpumask_var_t mask; + + if ((len * BITS_PER_BYTE) < nr_cpu_ids) + return -EINVAL; + if (len & (sizeof(unsigned long)-1)) + return -EINVAL; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + ret = sched_getaffinity(pid, mask); + if (ret == 0) { + unsigned int retlen = min_t(size_t, len, cpumask_size()); + + if (copy_to_user(user_mask_ptr, mask, retlen)) + ret = -EFAULT; + else + ret = retlen; + } + free_cpumask_var(mask); + + return ret; +} + +/** + * sys_sched_yield - yield the current processor to other threads. + * + * This function yields the current CPU to other tasks. It does this by + * scheduling away the current task. If it still has the earliest deadline + * it will be scheduled again as the next task. + * + * Return: 0. + */ +static void do_sched_yield(void) +{ + struct rq *rq; + struct rq_flags rf; + + if (!sched_yield_type) + return; + + rq = this_rq_lock_irq(&rf); + + schedstat_inc(rq->yld_count); + + if (1 == sched_yield_type) { + if (!rt_task(current)) { + current->boost_prio = MAX_PRIORITY_ADJ; + requeue_task(current, rq); + } + } else if (2 == sched_yield_type) { + if (rq->nr_running > 1) + rq->skip = current; + } + + /* + * Since we are going to call schedule() anyway, there's + * no need to preempt or enable interrupts: + */ + preempt_disable(); + raw_spin_unlock(&rq->lock); + sched_preempt_enable_no_resched(); + + schedule(); +} + +SYSCALL_DEFINE0(sched_yield) +{ + do_sched_yield(); + return 0; +} + +#ifndef CONFIG_PREEMPTION +int __sched _cond_resched(void) +{ + if (should_resched(0)) { + preempt_schedule_common(); + return 1; + } + rcu_all_qs(); + return 0; +} +EXPORT_SYMBOL(_cond_resched); +#endif + +/* + * __cond_resched_lock() - if a reschedule is pending, drop the given lock, + * call schedule, and on return reacquire the lock. + * + * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level + * operations here to prevent schedule() from being called twice (once via + * spin_unlock(), once by hand). + */ +int __cond_resched_lock(spinlock_t *lock) +{ + int resched = should_resched(PREEMPT_LOCK_OFFSET); + int ret = 0; + + lockdep_assert_held(lock); + + if (spin_needbreak(lock) || resched) { + spin_unlock(lock); + if (resched) + preempt_schedule_common(); + else + cpu_relax(); + ret = 1; + spin_lock(lock); + } + return ret; +} +EXPORT_SYMBOL(__cond_resched_lock); + +/** + * yield - yield the current processor to other threads. + * + * Do not ever use this function, there's a 99% chance you're doing it wrong. + * + * The scheduler is at all times free to pick the calling task as the most + * eligible task to run, if removing the yield() call from your code breaks + * it, its already broken. + * + * Typical broken usage is: + * + * while (!event) + * yield(); + * + * where one assumes that yield() will let 'the other' process run that will + * make event true. If the current task is a SCHED_FIFO task that will never + * happen. Never use yield() as a progress guarantee!! + * + * If you want to use yield() to wait for something, use wait_event(). + * If you want to use yield() to be 'nice' for others, use cond_resched(). + * If you still want to use yield(), do not! + */ +void __sched yield(void) +{ + set_current_state(TASK_RUNNING); + do_sched_yield(); +} +EXPORT_SYMBOL(yield); + +/** + * yield_to - yield the current processor to another thread in + * your thread group, or accelerate that thread toward the + * processor it's on. + * @p: target task + * @preempt: whether task preemption is allowed or not + * + * It's the caller's job to ensure that the target task struct + * can't go away on us before we can do any checks. + * + * In BMQ, yield_to is not supported. + * + * Return: + * true (>0) if we indeed boosted the target task. + * false (0) if we failed to boost the target. + * -ESRCH if there's no task to yield to. + */ +int __sched yield_to(struct task_struct *p, bool preempt) +{ + return 0; +} +EXPORT_SYMBOL_GPL(yield_to); + +int io_schedule_prepare(void) +{ + int old_iowait = current->in_iowait; + + current->in_iowait = 1; + blk_schedule_flush_plug(current); + + return old_iowait; +} + +void io_schedule_finish(int token) +{ + current->in_iowait = token; +} + +/* + * This task is about to go to sleep on IO. Increment rq->nr_iowait so + * that process accounting knows that this is a task in IO wait state. + * + * But don't do that if it is a deliberate, throttling IO wait (this task + * has set its backing_dev_info: the queue against which it should throttle) + */ + +long __sched io_schedule_timeout(long timeout) +{ + int token; + long ret; + + token = io_schedule_prepare(); + ret = schedule_timeout(timeout); + io_schedule_finish(token); + + return ret; +} +EXPORT_SYMBOL(io_schedule_timeout); + +void __sched io_schedule(void) +{ + int token; + + token = io_schedule_prepare(); + schedule(); + io_schedule_finish(token); +} +EXPORT_SYMBOL(io_schedule); + +/** + * sys_sched_get_priority_max - return maximum RT priority. + * @policy: scheduling class. + * + * Return: On success, this syscall returns the maximum + * rt_priority that can be used by a given scheduling class. + * On failure, a negative error code is returned. + */ +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) +{ + int ret = -EINVAL; + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = MAX_USER_RT_PRIO-1; + break; + case SCHED_NORMAL: + case SCHED_BATCH: + case SCHED_IDLE: + ret = 0; + break; + } + return ret; +} + +/** + * sys_sched_get_priority_min - return minimum RT priority. + * @policy: scheduling class. + * + * Return: On success, this syscall returns the minimum + * rt_priority that can be used by a given scheduling class. + * On failure, a negative error code is returned. + */ +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) +{ + int ret = -EINVAL; + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = 1; + break; + case SCHED_NORMAL: + case SCHED_BATCH: + case SCHED_IDLE: + ret = 0; + break; + } + return ret; +} + +static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) +{ + struct task_struct *p; + int retval; + + if (pid < 0) + return -EINVAL; + + retval = -ESRCH; + rcu_read_lock(); + p = find_process_by_pid(pid); + if (!p) + goto out_unlock; + + retval = security_task_getscheduler(p); + if (retval) + goto out_unlock; + rcu_read_unlock(); + + *t = ns_to_timespec64(sched_timeslice_ns); + return 0; + +out_unlock: + rcu_read_unlock(); + return retval; +} + +/** + * sys_sched_rr_get_interval - return the default timeslice of a process. + * @pid: pid of the process. + * @interval: userspace pointer to the timeslice value. + * + * + * Return: On success, 0 and the timeslice is in @interval. Otherwise, + * an error code. + */ +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, + struct __kernel_timespec __user *, interval) +{ + struct timespec64 t; + int retval = sched_rr_get_interval(pid, &t); + + if (retval == 0) + retval = put_timespec64(&t, interval); + + return retval; +} + +#ifdef CONFIG_COMPAT_32BIT_TIME +SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid, + struct old_timespec32 __user *, interval) +{ + struct timespec64 t; + int retval = sched_rr_get_interval(pid, &t); + + if (retval == 0) + retval = put_old_timespec32(&t, interval); + return retval; +} +#endif + +void sched_show_task(struct task_struct *p) +{ + unsigned long free = 0; + int ppid; + + if (!try_get_task_stack(p)) + return; + + printk(KERN_INFO "%-15.15s %c", p->comm, task_state_to_char(p)); + + if (p->state == TASK_RUNNING) + printk(KERN_CONT " running task "); +#ifdef CONFIG_DEBUG_STACK_USAGE + free = stack_not_used(p); +#endif + ppid = 0; + rcu_read_lock(); + if (pid_alive(p)) + ppid = task_pid_nr(rcu_dereference(p->real_parent)); + rcu_read_unlock(); + printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, + task_pid_nr(p), ppid, + (unsigned long)task_thread_info(p)->flags); + + print_worker_info(KERN_INFO, p); + show_stack(p, NULL); + put_task_stack(p); +} +EXPORT_SYMBOL_GPL(sched_show_task); + +static inline bool +state_filter_match(unsigned long state_filter, struct task_struct *p) +{ + /* no filter, everything matches */ + if (!state_filter) + return true; + + /* filter, but doesn't match */ + if (!(p->state & state_filter)) + return false; + + /* + * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows + * TASK_KILLABLE). + */ + if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) + return false; + + return true; +} + + +void show_state_filter(unsigned long state_filter) +{ + struct task_struct *g, *p; + +#if BITS_PER_LONG == 32 + printk(KERN_INFO + " task PC stack pid father\n"); +#else + printk(KERN_INFO + " task PC stack pid father\n"); +#endif + rcu_read_lock(); + for_each_process_thread(g, p) { + /* + * reset the NMI-timeout, listing all files on a slow + * console might take a lot of time: + * Also, reset softlockup watchdogs on all CPUs, because + * another CPU might be blocked waiting for us to process + * an IPI. + */ + touch_nmi_watchdog(); + touch_all_softlockup_watchdogs(); + if (state_filter_match(state_filter, p)) + sched_show_task(p); + } + +#ifdef CONFIG_SCHED_DEBUG + /* TODO: BMQ should support this + if (!state_filter) + sysrq_sched_debug_show(); + */ +#endif + rcu_read_unlock(); + /* + * Only show locks if all tasks are dumped: + */ + if (!state_filter) + debug_show_all_locks(); +} + +void dump_cpu_task(int cpu) +{ + pr_info("Task dump for CPU %d:\n", cpu); + sched_show_task(cpu_curr(cpu)); +} + +/** + * init_idle - set up an idle thread for a given CPU + * @idle: task in question + * @cpu: CPU the idle task belongs to + * + * NOTE: this function does not set the idle thread's NEED_RESCHED + * flag, to make booting more robust. + */ +void init_idle(struct task_struct *idle, int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + __sched_fork(0, idle); + + raw_spin_lock_irqsave(&idle->pi_lock, flags); + raw_spin_lock(&rq->lock); + update_rq_clock(rq); + + idle->last_ran = rq->clock_task; + idle->state = TASK_RUNNING; + idle->flags |= PF_IDLE; + /* Setting prio to illegal value shouldn't matter as it will never be de/enqueued */ + idle->prio = MAX_PRIO; + idle->bmq_idx = IDLE_TASK_SCHED_PRIO; + bmq_init_idle(&rq->queue, idle); + + kasan_unpoison_task_stack(idle); + +#ifdef CONFIG_SMP + /* + * It's possible that init_idle() gets called multiple times on a task, + * in that case do_set_cpus_allowed() will not do the right thing. + * + * And since this is boot we can forgo the serialisation. + */ + set_cpus_allowed_common(idle, cpumask_of(cpu)); +#endif + + /* Silence PROVE_RCU */ + rcu_read_lock(); + __set_task_cpu(idle, cpu); + rcu_read_unlock(); + + rq->idle = idle; + rcu_assign_pointer(rq->curr, idle); + idle->on_cpu = 1; + + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&idle->pi_lock, flags); + + /* Set the preempt count _outside_ the spinlocks! */ + init_idle_preempt_count(idle, cpu); + + ftrace_graph_init_idle_task(idle, cpu); + vtime_init_idle(idle, cpu); +#ifdef CONFIG_SMP + sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif +} + +#ifdef CONFIG_SMP + +int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur, + const struct cpumask __maybe_unused *trial) +{ + return 1; +} + +int task_can_attach(struct task_struct *p, + const struct cpumask *cs_cpus_allowed) +{ + int ret = 0; + + /* + * Kthreads which disallow setaffinity shouldn't be moved + * to a new cpuset; we don't want to change their CPU + * affinity and isolating such threads by their set of + * allowed nodes is unnecessary. Thus, cpusets are not + * applicable for such threads. This prevents checking for + * success of set_cpus_allowed_ptr() on all attached tasks + * before cpus_mask may be changed. + */ + if (p->flags & PF_NO_SETAFFINITY) + ret = -EINVAL; + + return ret; +} + +bool sched_smp_initialized __read_mostly; + +#ifdef CONFIG_HOTPLUG_CPU +/* + * Ensures that the idle task is using init_mm right before its CPU goes + * offline. + */ +void idle_task_exit(void) +{ + struct mm_struct *mm = current->active_mm; + + BUG_ON(cpu_online(smp_processor_id())); + + if (mm != &init_mm) { + switch_mm(mm, &init_mm, current); + current->active_mm = &init_mm; + finish_arch_post_lock_switch(); + } + mmdrop(mm); +} + +/* + * Migrate all tasks from the rq, sleeping tasks will be migrated by + * try_to_wake_up()->select_task_rq(). + * + * Called with rq->lock held even though we'er in stop_machine() and + * there's no concurrency possible, we hold the required locks anyway + * because of lock validation efforts. + */ +static void migrate_tasks(struct rq *dead_rq) +{ + struct rq *rq = dead_rq; + struct task_struct *p, *stop = rq->stop; + int count = 0; + + /* + * Fudge the rq selection such that the below task selection loop + * doesn't get stuck on the currently eligible stop task. + * + * We're currently inside stop_machine() and the rq is either stuck + * in the stop_machine_cpu_stop() loop, or we're executing this code, + * either way we should never end up calling schedule() until we're + * done here. + */ + rq->stop = NULL; + + p = rq_first_bmq_task(rq); + while (p != rq->idle) { + int dest_cpu; + + /* skip the running task */ + if (task_running(p) || 1 == p->nr_cpus_allowed) { + p = rq_next_bmq_task(p, rq); + continue; + } + + /* + * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + raw_spin_unlock(&rq->lock); + raw_spin_lock(&p->pi_lock); + raw_spin_lock(&rq->lock); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (WARN_ON(task_rq(p) != rq || !task_on_rq_queued(p))) { + raw_spin_unlock(&p->pi_lock); + p = rq_next_bmq_task(p, rq); + continue; + } + + count++; + /* Find suitable destination for @next, with force if needed. */ + dest_cpu = select_fallback_rq(dead_rq->cpu, p); + rq = __migrate_task(rq, p, dest_cpu); + raw_spin_unlock(&rq->lock); + raw_spin_unlock(&p->pi_lock); + + rq = dead_rq; + raw_spin_lock(&rq->lock); + /* Check queued task all over from the header again */ + p = rq_first_bmq_task(rq); + } + + rq->stop = stop; +} + +static void set_rq_offline(struct rq *rq) +{ + if (rq->online) + rq->online = false; +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void set_rq_online(struct rq *rq) +{ + if (!rq->online) + rq->online = true; +} + +/* + * used to mark begin/end of suspend/resume: + */ +static int num_cpus_frozen; + +/* + * Update cpusets according to cpu_active mask. If cpusets are + * disabled, cpuset_update_active_cpus() becomes a simple wrapper + * around partition_sched_domains(). + * + * If we come here as part of a suspend/resume, don't touch cpusets because we + * want to restore it back to its original state upon resume anyway. + */ +static void cpuset_cpu_active(void) +{ + if (cpuhp_tasks_frozen) { + /* + * num_cpus_frozen tracks how many CPUs are involved in suspend + * resume sequence. As long as this is not the last online + * operation in the resume sequence, just build a single sched + * domain, ignoring cpusets. + */ + partition_sched_domains(1, NULL, NULL); + if (--num_cpus_frozen) + return; + /* + * This is the last CPU online operation. So fall through and + * restore the original sched domains by considering the + * cpuset configurations. + */ + cpuset_force_rebuild(); + } + + cpuset_update_active_cpus(); +} + +static int cpuset_cpu_inactive(unsigned int cpu) +{ + if (!cpuhp_tasks_frozen) { + cpuset_update_active_cpus(); + } else { + num_cpus_frozen++; + partition_sched_domains(1, NULL, NULL); + } + return 0; +} + +int sched_cpu_activate(unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + +#ifdef CONFIG_SCHED_SMT + /* + * When going up, increment the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); +#endif + set_cpu_active(cpu, true); + + if (sched_smp_initialized) + cpuset_cpu_active(); + + /* + * Put the rq online, if not already. This happens: + * + * 1) In the early boot process, because we build the real domains + * after all cpus have been brought up. + * + * 2) At runtime, if cpuset_cpu_active() fails to rebuild the + * domains. + */ + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_online(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); + + return 0; +} + +int sched_cpu_deactivate(unsigned int cpu) +{ + int ret; + + set_cpu_active(cpu, false); + /* + * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU + * users of this state to go away such that all new such users will + * observe it. + * + * Do sync before park smpboot threads to take care the rcu boost case. + */ + synchronize_rcu(); + +#ifdef CONFIG_SCHED_SMT + /* + * When going down, decrement the number of cores with SMT present. + */ + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) { + static_branch_dec_cpuslocked(&sched_smt_present); + if (!static_branch_likely(&sched_smt_present)) + cpumask_clear(&sched_sg_idle_mask); + } +#endif + + if (!sched_smp_initialized) + return 0; + + ret = cpuset_cpu_inactive(cpu); + if (ret) { + set_cpu_active(cpu, true); + return ret; + } + return 0; +} + +static void sched_rq_cpu_starting(unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + rq->calc_load_update = calc_load_update; +} + +int sched_cpu_starting(unsigned int cpu) +{ + sched_rq_cpu_starting(cpu); + sched_tick_start(cpu); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +int sched_cpu_dying(unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + sched_tick_stop(cpu); + raw_spin_lock_irqsave(&rq->lock, flags); + set_rq_offline(rq); + migrate_tasks(rq); + raw_spin_unlock_irqrestore(&rq->lock, flags); + + hrtick_clear(rq); + return 0; +} +#endif + +#ifdef CONFIG_SMP +static void sched_init_topology_cpumask_early(void) +{ + int cpu, level; + cpumask_t *tmp; + + for_each_possible_cpu(cpu) { + for (level = 0; level < NR_CPU_AFFINITY_CHK_LEVEL; level++) { + tmp = &(per_cpu(sched_cpu_affinity_masks, cpu)[level]); + cpumask_copy(tmp, cpu_possible_mask); + cpumask_clear_cpu(cpu, tmp); + } + per_cpu(sched_cpu_llc_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + per_cpu(sched_cpu_affinity_end_mask, cpu) = + &(per_cpu(sched_cpu_affinity_masks, cpu)[1]); + per_cpu(sd_llc_id, cpu) = cpu; + } +} + +#define TOPOLOGY_CPUMASK(name, func) \ + if (cpumask_and(chk, chk, func(cpu))) { \ + per_cpu(sched_cpu_llc_mask, cpu) = chk; \ + per_cpu(sd_llc_id, cpu) = cpumask_first(func(cpu)); \ + printk(KERN_INFO "bmq: cpu#%d affinity mask - "#name" 0x%08lx", \ + cpu, (chk++)->bits[0]); \ + } \ + cpumask_complement(chk, func(cpu)) + +static void sched_init_topology_cpumask(void) +{ + int cpu; + cpumask_t *chk; + + for_each_online_cpu(cpu) { + chk = &(per_cpu(sched_cpu_affinity_masks, cpu)[0]); + + cpumask_complement(chk, cpumask_of(cpu)); +#ifdef CONFIG_SCHED_SMT + TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask); +#endif +#ifdef CONFIG_SCHED_MC + TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask); +#endif + + TOPOLOGY_CPUMASK(core, topology_core_cpumask); + + if (cpumask_and(chk, chk, cpu_online_mask)) + printk(KERN_INFO "bmq: cpu#%d affinity mask - others 0x%08lx", + cpu, (chk++)->bits[0]); + + per_cpu(sched_cpu_affinity_end_mask, cpu) = chk; + printk(KERN_INFO "bmq: cpu#%d llc_id = %d, llc_mask idx = %ld\n", + cpu, per_cpu(sd_llc_id, cpu), + per_cpu(sched_cpu_llc_mask, cpu) - + &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); + } +} +#endif + +void __init sched_init_smp(void) +{ + /* Move init over to a non-isolated CPU */ + if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) + BUG(); + + sched_init_topology_cpumask(); + + sched_smp_initialized = true; +} +#else +void __init sched_init_smp(void) +{ +} +#endif /* CONFIG_SMP */ + +int in_sched_functions(unsigned long addr) +{ + return in_lock_functions(addr) || + (addr >= (unsigned long)__sched_text_start + && addr < (unsigned long)__sched_text_end); +} + +#ifdef CONFIG_CGROUP_SCHED +/* task group related information */ +struct task_group { + struct cgroup_subsys_state css; + + struct rcu_head rcu; + struct list_head list; + + struct task_group *parent; + struct list_head siblings; + struct list_head children; +}; + +/* + * Default task group. + * Every task in system belongs to this group at bootup. + */ +struct task_group root_task_group; +LIST_HEAD(task_groups); + +/* Cacheline aligned slab cache for task_group */ +static struct kmem_cache *task_group_cache __read_mostly; +#endif /* CONFIG_CGROUP_SCHED */ + +void __init sched_init(void) +{ + int i; + struct rq *rq; + + print_scheduler_version(); + + wait_bit_init(); + +#ifdef CONFIG_SMP + for (i = 0; i < bmq_BITS; i++) + cpumask_copy(&sched_rq_watermark[i], cpu_present_mask); +#endif + +#ifdef CONFIG_CGROUP_SCHED + task_group_cache = KMEM_CACHE(task_group, 0); + + list_add(&root_task_group.list, &task_groups); + INIT_LIST_HEAD(&root_task_group.children); + INIT_LIST_HEAD(&root_task_group.siblings); +#endif /* CONFIG_CGROUP_SCHED */ + for_each_possible_cpu(i) { + rq = cpu_rq(i); + + bmq_init(&rq->queue); + rq->watermark = IDLE_WM; + rq->skip = NULL; + + raw_spin_lock_init(&rq->lock); + rq->nr_running = rq->nr_uninterruptible = 0; + rq->calc_load_active = 0; + rq->calc_load_update = jiffies + LOAD_FREQ; +#ifdef CONFIG_SMP + rq->online = false; + rq->cpu = i; + +#ifdef CONFIG_SCHED_SMT + rq->active_balance = 0; +#endif +#endif + rq->nr_switches = 0; + atomic_set(&rq->nr_iowait, 0); + hrtick_rq_init(rq); + } +#ifdef CONFIG_SMP + /* Set rq->online for cpu 0 */ + cpu_rq(0)->online = true; +#endif + + /* + * The boot idle thread does lazy MMU switching as well: + */ + mmgrab(&init_mm); + enter_lazy_tlb(&init_mm, current); + + /* + * Make us the idle thread. Technically, schedule() should not be + * called from this thread, however somewhere below it might be, + * but because we are the idle thread, we just pick up running again + * when this runqueue becomes "idle". + */ + init_idle(current, smp_processor_id()); + + calc_load_update = jiffies + LOAD_FREQ; + +#ifdef CONFIG_SMP + idle_thread_set_boot_cpu(); + + sched_init_topology_cpumask_early(); +#endif /* SMP */ + + init_schedstats(); + + psi_init(); +} + +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP +static inline int preempt_count_equals(int preempt_offset) +{ + int nested = preempt_count() + rcu_preempt_depth(); + + return (nested == preempt_offset); +} + +void __might_sleep(const char *file, int line, int preempt_offset) +{ + /* + * Blocking primitives will set (and therefore destroy) current->state, + * since we will exit with TASK_RUNNING make sure we enter with it, + * otherwise we will destroy state. + */ + WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change, + "do not call blocking ops when !TASK_RUNNING; " + "state=%lx set at [<%p>] %pS\n", + current->state, + (void *)current->task_state_change, + (void *)current->task_state_change); + + ___might_sleep(file, line, preempt_offset); +} +EXPORT_SYMBOL(__might_sleep); + +void ___might_sleep(const char *file, int line, int preempt_offset) +{ + /* Ratelimiting timestamp: */ + static unsigned long prev_jiffy; + + unsigned long preempt_disable_ip; + + /* WARN_ON_ONCE() by default, no rate limit required: */ + rcu_sleep_check(); + + if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && + !is_idle_task(current) && !current->non_block_count) || + system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING || + oops_in_progress) + return; + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + /* Save this before calling printk(), since that will clobber it: */ + preempt_disable_ip = get_preempt_disable_ip(current); + + printk(KERN_ERR + "BUG: sleeping function called from invalid context at %s:%d\n", + file, line); + printk(KERN_ERR + "in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), current->non_block_count, + current->pid, current->comm); + + if (task_stack_end_corrupted(current)) + printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); + + debug_show_held_locks(current); + if (irqs_disabled()) + print_irqtrace_events(current); +#ifdef CONFIG_DEBUG_PREEMPT + if (!preempt_count_equals(preempt_offset)) { + pr_err("Preemption disabled at:"); + print_ip_sym(preempt_disable_ip); + pr_cont("\n"); + } +#endif + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} +EXPORT_SYMBOL(___might_sleep); + +void __cant_sleep(const char *file, int line, int preempt_offset) +{ + static unsigned long prev_jiffy; + + if (irqs_disabled()) + return; + + if (!IS_ENABLED(CONFIG_PREEMPT_COUNT)) + return; + + if (preempt_count() > preempt_offset) + return; + + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) + return; + prev_jiffy = jiffies; + + printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line); + printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n", + in_atomic(), irqs_disabled(), + current->pid, current->comm); + + debug_show_held_locks(current); + dump_stack(); + add_taint(TAINT_WARN, LOCKDEP_STILL_OK); +} +EXPORT_SYMBOL_GPL(__cant_sleep); +#endif + +#ifdef CONFIG_MAGIC_SYSRQ +void normalize_rt_tasks(void) +{ + struct task_struct *g, *p; + struct sched_attr attr = { + .sched_policy = SCHED_NORMAL, + }; + + read_lock(&tasklist_lock); + for_each_process_thread(g, p) { + /* + * Only normalize user tasks: + */ + if (p->flags & PF_KTHREAD) + continue; + + if (!rt_task(p)) { + /* + * Renice negative nice level userspace + * tasks back to 0: + */ + if (task_nice(p) < 0) + set_user_nice(p, 0); + continue; + } + + __sched_setscheduler(p, &attr, false, false); + } + read_unlock(&tasklist_lock); +} +#endif /* CONFIG_MAGIC_SYSRQ */ + +#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) +/* + * These functions are only useful for the IA64 MCA handling, or kdb. + * + * They can only be called when the whole system has been + * stopped - every CPU needs to be quiescent, and no scheduling + * activity can take place. Using them for anything else would + * be a serious bug, and as a result, they aren't even visible + * under any other configuration. + */ + +/** + * curr_task - return the current task for a given CPU. + * @cpu: the processor in question. + * + * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! + * + * Return: The current task for @cpu. + */ +struct task_struct *curr_task(int cpu) +{ + return cpu_curr(cpu); +} + +#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ + +#ifdef CONFIG_IA64 +/** + * ia64_set_curr_task - set the current task for a given CPU. + * @cpu: the processor in question. + * @p: the task pointer to set. + * + * Description: This function must only be used when non-maskable interrupts + * are serviced on a separate stack. It allows the architecture to switch the + * notion of the current task on a CPU in a non-blocking manner. This function + * must be called with all CPU's synchronised, and interrupts disabled, the + * and caller must save the original value of the current task (see + * curr_task() above) and restore that value before reenabling interrupts and + * re-starting the system. + * + * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! + */ +void ia64_set_curr_task(int cpu, struct task_struct *p) +{ + cpu_curr(cpu) = p; +} + +#endif + +#ifdef CONFIG_CGROUP_SCHED +static void sched_free_group(struct task_group *tg) +{ + kmem_cache_free(task_group_cache, tg); +} + +/* allocate runqueue etc for a new task group */ +struct task_group *sched_create_group(struct task_group *parent) +{ + struct task_group *tg; + + tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO); + if (!tg) + return ERR_PTR(-ENOMEM); + + return tg; +} + +void sched_online_group(struct task_group *tg, struct task_group *parent) +{ +} + +/* rcu callback to free various structures associated with a task group */ +static void sched_free_group_rcu(struct rcu_head *rhp) +{ + /* Now it should be safe to free those cfs_rqs */ + sched_free_group(container_of(rhp, struct task_group, rcu)); +} + +void sched_destroy_group(struct task_group *tg) +{ + /* Wait for possible concurrent references to cfs_rqs complete */ + call_rcu(&tg->rcu, sched_free_group_rcu); +} + +void sched_offline_group(struct task_group *tg) +{ +} + +static inline struct task_group *css_tg(struct cgroup_subsys_state *css) +{ + return css ? container_of(css, struct task_group, css) : NULL; +} + +static struct cgroup_subsys_state * +cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct task_group *parent = css_tg(parent_css); + struct task_group *tg; + + if (!parent) { + /* This is early initialization for the top cgroup */ + return &root_task_group.css; + } + + tg = sched_create_group(parent); + if (IS_ERR(tg)) + return ERR_PTR(-ENOMEM); + return &tg->css; +} + +/* Expose task group only after completing cgroup initialization */ +static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + struct task_group *parent = css_tg(css->parent); + + if (parent) + sched_online_group(tg, parent); + return 0; +} + +static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + + sched_offline_group(tg); +} + +static void cpu_cgroup_css_free(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + + /* + * Relies on the RCU grace period between css_released() and this. + */ + sched_free_group(tg); +} + +static void cpu_cgroup_fork(struct task_struct *task) +{ +} + +static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) +{ + return 0; +} + +static void cpu_cgroup_attach(struct cgroup_taskset *tset) +{ +} + +static struct cftype cpu_legacy_files[] = { + { } /* Terminate */ +}; + +static struct cftype cpu_files[] = { + { } /* terminate */ +}; + +static int cpu_extra_stat_show(struct seq_file *sf, + struct cgroup_subsys_state *css) +{ + return 0; +} + +struct cgroup_subsys cpu_cgrp_subsys = { + .css_alloc = cpu_cgroup_css_alloc, + .css_online = cpu_cgroup_css_online, + .css_released = cpu_cgroup_css_released, + .css_free = cpu_cgroup_css_free, + .css_extra_stat_show = cpu_extra_stat_show, + .fork = cpu_cgroup_fork, + .can_attach = cpu_cgroup_can_attach, + .attach = cpu_cgroup_attach, + .legacy_cftypes = cpu_files, + .legacy_cftypes = cpu_legacy_files, + .dfl_cftypes = cpu_files, + .early_init = true, + .threaded = true, +}; +#endif /* CONFIG_CGROUP_SCHED */ + +#undef CREATE_TRACE_POINTS diff --git a/kernel/sched/bmq_debug.c b/kernel/sched/bmq_debug.c new file mode 100644 index 000000000000..375a1a805d86 --- /dev/null +++ b/kernel/sched/bmq_debug.c @@ -0,0 +1,31 @@ +/* + * kernel/sched/bmq_debug.c + * + * Print the BMQ debugging details + * + * Author: Alfred Chen + * Date : 2020 + */ +#include "bmq_sched.h" + +/* + * This allows printing both to /proc/sched_debug and + * to the console + */ +#define SEQ_printf(m, x...) \ + do { \ + if (m) \ + seq_printf(m, x); \ + else \ + pr_cont(x); \ + } while (0) + +void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, + struct seq_file *m) +{ + SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), + get_nr_threads(p)); +} + +void proc_sched_set_task(struct task_struct *p) +{} diff --git a/kernel/sched/bmq_sched.h b/kernel/sched/bmq_sched.h new file mode 100644 index 000000000000..449d6b54a253 --- /dev/null +++ b/kernel/sched/bmq_sched.h @@ -0,0 +1,509 @@ +#ifndef BMQ_SCHED_H +#define BMQ_SCHED_H + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef CONFIG_PARAVIRT +# include +#endif + +#include "cpupri.h" + +/* task_struct::on_rq states: */ +#define TASK_ON_RQ_QUEUED 1 +#define TASK_ON_RQ_MIGRATING 2 + +static inline int task_on_rq_queued(struct task_struct *p) +{ + return p->on_rq == TASK_ON_RQ_QUEUED; +} + +static inline int task_on_rq_migrating(struct task_struct *p) +{ + return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; +} + +/* + * wake flags + */ +#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ +#define WF_FORK 0x02 /* child wakeup after fork */ +#define WF_MIGRATED 0x04 /* internal use, task got migrated */ + +/* bits: + * RT, Low prio adj range, nice width, high prio adj range, cpu idle task */ +#define bmq_BITS (NICE_WIDTH + 2 * MAX_PRIORITY_ADJ + 2) +#define IDLE_TASK_SCHED_PRIO (bmq_BITS - 1) + +struct bmq { + DECLARE_BITMAP(bitmap, bmq_BITS); + struct list_head heads[bmq_BITS]; +}; + +/* + * This is the main, per-CPU runqueue data structure. + * This data should only be modified by the local cpu. + */ +struct rq { + /* runqueue lock: */ + raw_spinlock_t lock; + + struct task_struct *curr, *idle, *stop, *skip; + struct mm_struct *prev_mm; + + struct bmq queue; + unsigned long watermark; + + /* switch count */ + u64 nr_switches; + + atomic_t nr_iowait; + +#ifdef CONFIG_MEMBARRIER + int membarrier_state; +#endif + +#ifdef CONFIG_SMP + int cpu; /* cpu of this runqueue */ + bool online; + +#ifdef CONFIG_HAVE_SCHED_AVG_IRQ + struct sched_avg avg_irq; +#endif + +#ifdef CONFIG_SCHED_SMT + int active_balance; + struct cpu_stop_work active_balance_work; +#endif +#endif /* CONFIG_SMP */ +#ifdef CONFIG_IRQ_TIME_ACCOUNTING + u64 prev_irq_time; +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifdef CONFIG_PARAVIRT + u64 prev_steal_time; +#endif /* CONFIG_PARAVIRT */ +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + u64 prev_steal_time_rq; +#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */ + + /* calc_load related fields */ + unsigned long calc_load_update; + long calc_load_active; + + u64 clock, last_tick; + u64 last_ts_switch; + u64 clock_task; + + unsigned long nr_running; + unsigned long nr_uninterruptible; + +#ifdef CONFIG_SCHED_HRTICK +#ifdef CONFIG_SMP + int hrtick_csd_pending; + call_single_data_t hrtick_csd; +#endif + struct hrtimer hrtick_timer; +#endif + +#ifdef CONFIG_SCHEDSTATS + + /* latency stats */ + struct sched_info rq_sched_info; + unsigned long long rq_cpu_time; + /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ + + /* sys_sched_yield() stats */ + unsigned int yld_count; + + /* schedule() stats */ + unsigned int sched_switch; + unsigned int sched_count; + unsigned int sched_goidle; + + /* try_to_wake_up() stats */ + unsigned int ttwu_count; + unsigned int ttwu_local; +#endif /* CONFIG_SCHEDSTATS */ +#ifdef CONFIG_CPU_IDLE + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; +#endif +}; + +extern unsigned long calc_load_update; +extern atomic_long_t calc_load_tasks; + +extern void calc_global_load_tick(struct rq *this_rq); +extern long calc_load_fold_active(struct rq *this_rq, long adjust); + +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) +#define this_rq() this_cpu_ptr(&runqueues) +#define task_rq(p) cpu_rq(task_cpu(p)) +#define cpu_curr(cpu) (cpu_rq(cpu)->curr) +#define raw_rq() raw_cpu_ptr(&runqueues) + +#ifdef CONFIG_SMP +#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) +void register_sched_domain_sysctl(void); +void unregister_sched_domain_sysctl(void); +#else +static inline void register_sched_domain_sysctl(void) +{ +} +static inline void unregister_sched_domain_sysctl(void) +{ +} +#endif + +extern bool sched_smp_initialized; + +enum { + BASE_CPU_AFFINITY_CHK_LEVEL = 1, +#ifdef CONFIG_SCHED_SMT + SMT_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, +#endif +#ifdef CONFIG_SCHED_MC + MC_CPU_AFFINITY_CHK_LEVEL_SPACE_HOLDER, +#endif + NR_CPU_AFFINITY_CHK_LEVEL +}; + +DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_CHK_LEVEL], sched_cpu_affinity_masks); + +static inline int __best_mask_cpu(int cpu, const cpumask_t *cpumask, + const cpumask_t *mask) +{ + while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids) + mask++; + return cpu; +} + +static inline int best_mask_cpu(int cpu, const cpumask_t *cpumask) +{ + return cpumask_test_cpu(cpu, cpumask)? cpu : + __best_mask_cpu(cpu, cpumask, &(per_cpu(sched_cpu_affinity_masks, cpu)[0])); +} + +#endif /* CONFIG_SMP */ + +#ifndef arch_scale_freq_capacity +static __always_inline +unsigned long arch_scale_freq_capacity(int cpu) +{ + return SCHED_CAPACITY_SCALE; +} +#endif + +static inline u64 __rq_clock_broken(struct rq *rq) +{ + return READ_ONCE(rq->clock); +} + +static inline u64 rq_clock(struct rq *rq) +{ + /* + * Relax lockdep_assert_held() checking as in VRQ, call to + * sched_info_xxxx() may not held rq->lock + * lockdep_assert_held(&rq->lock); + */ + return rq->clock; +} + +static inline u64 rq_clock_task(struct rq *rq) +{ + /* + * Relax lockdep_assert_held() checking as in VRQ, call to + * sched_info_xxxx() may not held rq->lock + * lockdep_assert_held(&rq->lock); + */ + return rq->clock_task; +} + +/* + * {de,en}queue flags: + * + * DEQUEUE_SLEEP - task is no longer runnable + * ENQUEUE_WAKEUP - task just became runnable + * + */ + +#define DEQUEUE_SLEEP 0x01 + +#define ENQUEUE_WAKEUP 0x01 + + +/* + * Below are scheduler API which using in other kernel code + * It use the dummy rq_flags + * ToDo : BMQ need to support these APIs for compatibility with mainline + * scheduler code. + */ +struct rq_flags { + unsigned long flags; +}; + +struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) + __acquires(rq->lock); + +struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) + __acquires(p->pi_lock) + __acquires(rq->lock); + +static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + raw_spin_unlock(&rq->lock); +} + +static inline void +task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) + __releases(rq->lock) + __releases(p->pi_lock) +{ + raw_spin_unlock(&rq->lock); + raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); +} + +static inline void +rq_unlock_irq(struct rq *rq, struct rq_flags *rf) + __releases(rq->lock) +{ + raw_spin_unlock_irq(&rq->lock); +} + +static inline struct rq * +this_rq_lock_irq(struct rq_flags *rf) + __acquires(rq->lock) +{ + struct rq *rq; + + local_irq_disable(); + rq = this_rq(); + raw_spin_lock(&rq->lock); + + return rq; +} + +static inline bool task_running(struct task_struct *p) +{ + return p->on_cpu; +} + +extern struct static_key_false sched_schedstats; + +static inline void sched_ttwu_pending(void) { } + +#ifdef CONFIG_CPU_IDLE +static inline void idle_set_state(struct rq *rq, + struct cpuidle_state *idle_state) +{ + rq->idle_state = idle_state; +} + +static inline struct cpuidle_state *idle_get_state(struct rq *rq) +{ + WARN_ON(!rcu_read_lock_held()); + return rq->idle_state; +} +#else +static inline void idle_set_state(struct rq *rq, + struct cpuidle_state *idle_state) +{ +} + +static inline struct cpuidle_state *idle_get_state(struct rq *rq) +{ + return NULL; +} +#endif + +static inline int cpu_of(const struct rq *rq) +{ +#ifdef CONFIG_SMP + return rq->cpu; +#else + return 0; +#endif +} + +#include "stats.h" + +#ifdef CONFIG_IRQ_TIME_ACCOUNTING +struct irqtime { + u64 total; + u64 tick_delta; + u64 irq_start_time; + struct u64_stats_sync sync; +}; + +DECLARE_PER_CPU(struct irqtime, cpu_irqtime); + +/* + * Returns the irqtime minus the softirq time computed by ksoftirqd. + * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime + * and never move forward. + */ +static inline u64 irq_time_read(int cpu) +{ + struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu); + unsigned int seq; + u64 total; + + do { + seq = __u64_stats_fetch_begin(&irqtime->sync); + total = irqtime->total; + } while (__u64_stats_fetch_retry(&irqtime->sync, seq)); + + return total; +} +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ + +#ifdef CONFIG_CPU_FREQ +DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data); + +/** + * cpufreq_update_util - Take a note about CPU utilization changes. + * @rq: Runqueue to carry out the update for. + * @flags: Update reason flags. + * + * This function is called by the scheduler on the CPU whose utilization is + * being updated. + * + * It can only be called from RCU-sched read-side critical sections. + * + * The way cpufreq is currently arranged requires it to evaluate the CPU + * performance state (frequency/voltage) on a regular basis to prevent it from + * being stuck in a completely inadequate performance level for too long. + * That is not guaranteed to happen if the updates are only triggered from CFS + * and DL, though, because they may not be coming in if only RT tasks are + * active all the time (or there are RT tasks only). + * + * As a workaround for that issue, this function is called periodically by the + * RT sched class to trigger extra cpufreq updates to prevent it from stalling, + * but that really is a band-aid. Going forward it should be replaced with + * solutions targeted more specifically at RT tasks. + */ +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) +{ + struct update_util_data *data; + + data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + if (data) + data->func(data, rq_clock(rq), flags); +} +#else +static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} +#endif /* CONFIG_CPU_FREQ */ + +#ifdef CONFIG_NO_HZ_FULL +extern int __init sched_tick_offload_init(void); +#else +static inline int sched_tick_offload_init(void) { return 0; } +#endif + +#ifdef arch_scale_freq_capacity +#ifndef arch_scale_freq_invariant +#define arch_scale_freq_invariant() (true) +#endif +#else /* arch_scale_freq_capacity */ +#define arch_scale_freq_invariant() (false) +#endif + +extern void schedule_idle(void); + +/* + * !! For sched_setattr_nocheck() (kernel) only !! + * + * This is actually gross. :( + * + * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE + * tasks, but still be able to sleep. We need this on platforms that cannot + * atomically change clock frequency. Remove once fast switching will be + * available on such platforms. + * + * SUGOV stands for SchedUtil GOVernor. + */ +#define SCHED_FLAG_SUGOV 0x10000000 + +#ifdef CONFIG_MEMBARRIER +/* + * The scheduler provides memory barriers required by membarrier between: + * - prior user-space memory accesses and store to rq->membarrier_state, + * - store to rq->membarrier_state and following user-space memory accesses. + * In the same way it provides those guarantees around store to rq->curr. + */ +static inline void membarrier_switch_mm(struct rq *rq, + struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ + int membarrier_state; + + if (prev_mm == next_mm) + return; + + membarrier_state = atomic_read(&next_mm->membarrier_state); + if (READ_ONCE(rq->membarrier_state) == membarrier_state) + return; + + WRITE_ONCE(rq->membarrier_state, membarrier_state); +} +#else +static inline void membarrier_switch_mm(struct rq *rq, + struct mm_struct *prev_mm, + struct mm_struct *next_mm) +{ +} +#endif + +static inline int task_running_nice(struct task_struct *p) +{ + return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ); +} + +#ifdef CONFIG_NUMA +extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu); +#else +static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu) +{ + return nr_cpu_ids; +} +#endif +#endif /* BMQ_SCHED_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 90e4b00ace89..9e7768dbd92d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1253,7 +1253,8 @@ static void __init init_uclamp(void) mutex_init(&uclamp_mutex); for_each_possible_cpu(cpu) { - memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq)); + memset(&cpu_rq(cpu)->uclamp, 0, + sizeof(struct uclamp_rq)*UCLAMP_CNT); cpu_rq(cpu)->uclamp_flags = 0; } @@ -3668,28 +3669,32 @@ static void sched_tick_remote(struct work_struct *work) * statistics and checks timeslices in a time-independent way, regardless * of when exactly it is running. */ - if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) + if (!tick_nohz_tick_stopped_cpu(cpu)) goto out_requeue; rq_lock_irq(rq, &rf); curr = rq->curr; - if (is_idle_task(curr) || cpu_is_offline(cpu)) + if (cpu_is_offline(cpu)) goto out_unlock; + curr = rq->curr; update_rq_clock(rq); - delta = rq_clock_task(rq) - curr->se.exec_start; - /* - * Make sure the next tick runs within a reasonable - * amount of time. - */ - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + if (!is_idle_task(curr)) { + /* + * Make sure the next tick runs within a reasonable + * amount of time. + */ + delta = rq_clock_task(rq) - curr->se.exec_start; + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + } curr->sched_class->task_tick(rq, curr, 0); + calc_load_nohz_remote(rq); out_unlock: rq_unlock_irq(rq, &rf); - out_requeue: + /* * Run the remote tick once per second (1Hz). This arbitrary * frequency is large enough to avoid overload but short enough @@ -7063,8 +7068,15 @@ void sched_move_task(struct task_struct *tsk) if (queued) enqueue_task(rq, tsk, queue_flags); - if (running) + if (running) { set_next_task(rq, tsk); + /* + * After changing group, the running task may have joined a + * throttled one but it's still the running task. Trigger a + * resched to make sure that task can still run. + */ + resched_curr(rq); + } task_rq_unlock(rq, tsk, &rf); } @@ -7100,6 +7112,12 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) if (parent) sched_online_group(tg, parent); + +#ifdef CONFIG_UCLAMP_TASK_GROUP + /* Propagate the effective uclamp value for the new group */ + cpu_util_update_eff(css); +#endif + return 0; } @@ -7254,7 +7272,7 @@ capacity_from_percent(char *buf) &req.percent); if (req.ret) return req; - if (req.percent > UCLAMP_PERCENT_SCALE) { + if ((u64)req.percent > UCLAMP_PERCENT_SCALE) { req.ret = -ERANGE; return req; } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 9b8916fd00a2..9073fba046c8 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -183,6 +183,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, return cpufreq_driver_resolve_freq(policy, freq); } +#ifndef CONFIG_SCHED_BMQ /* * This function computes an effective utilization for the given CPU, to be * used for frequency selection given the linear relation: f = u * f_max. @@ -300,6 +301,13 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) return schedutil_cpu_util(sg_cpu->cpu, util, max, FREQUENCY_UTIL, NULL); } +#else /* CONFIG_SCHED_BMQ */ +static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) +{ + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); + return sg_cpu->max; +} +#endif /** * sugov_iowait_reset() - Reset the IO boost status of a CPU. @@ -443,7 +451,9 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } */ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy) { +#ifndef CONFIG_SCHED_BMQ if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) +#endif sg_policy->limits_changed = true; } @@ -686,6 +696,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) } ret = sched_setattr_nocheck(thread, &attr); + if (ret) { kthread_stop(thread); pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); @@ -916,6 +927,7 @@ static int __init sugov_register(void) core_initcall(sugov_register); #ifdef CONFIG_ENERGY_MODEL +#ifndef CONFIG_SCHED_BMQ extern bool sched_energy_update; extern struct mutex sched_energy_mutex; @@ -946,4 +958,10 @@ void sched_cpufreq_governor_change(struct cpufreq_policy *policy, } } +#else /* CONFIG_SCHED_BMQ */ +void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + struct cpufreq_governor *old_gov) +{ +} +#endif #endif diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index d43318a489f2..1a312bb6f4a1 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime) p->utime += cputime; account_group_user_time(p, cputime); - index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER; /* Add user time to cpustat. */ task_group_account_field(p, index, cputime); @@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime) p->gtime += cputime; /* Add guest time to cpustat. */ - if (task_nice(p) > 0) { + if (task_running_nice(p)) { cpustat[CPUTIME_NICE] += cputime; cpustat[CPUTIME_GUEST_NICE] += cputime; } else { @@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max) #ifdef CONFIG_64BIT static inline u64 read_sum_exec_runtime(struct task_struct *t) { - return t->se.sum_exec_runtime; + return tsk_seruntime(t); } #else static u64 read_sum_exec_runtime(struct task_struct *t) @@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) struct rq *rq; rq = task_rq_lock(t, &rf); - ns = t->se.sum_exec_runtime; + ns = tsk_seruntime(t); task_rq_unlock(rq, t, &rf); return ns; @@ -661,7 +661,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime = { - .sum_exec_runtime = p->se.sum_exec_runtime, + .sum_exec_runtime = tsk_seruntime(p), }; task_cputime(p, &cputime.utime, &cputime.stime); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ba749f579714..0ff2f43ac9cd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct task_struct *p, int target) */ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); struct sched_domain *this_sd; u64 avg_cost, avg_idle; u64 time, cost; @@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t time = cpu_clock(this); - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + + for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) return si_cpu; - if (!cpumask_test_cpu(cpu, p->cpus_ptr)) - continue; if (available_idle_cpu(cpu)) break; if (si_cpu == -1 && sched_idle_cpu(cpu)) @@ -8314,6 +8315,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, sgs->group_capacity = group->sgc->capacity; + sgs->group_weight = group->group_weight; + sgs->group_type = group_classify(sd->imbalance_pct, group, sgs); /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ffa959e91227..469f36c89a9d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -361,6 +361,7 @@ void cpu_startup_entry(enum cpuhp_state state) do_idle(); } +#ifndef CONFIG_SCHED_BMQ /* * idle-task scheduling class. */ @@ -481,3 +482,4 @@ const struct sched_class idle_sched_class = { .switched_to = switched_to_idle, .update_curr = update_curr_idle, }; +#endif diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 28a516575c18..de22da666ac7 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void) return calc_load_idx & 1; } -void calc_load_nohz_start(void) +static void calc_load_nohz_fold(struct rq *rq) { - struct rq *this_rq = this_rq(); long delta; - /* - * We're going into NO_HZ mode, if there's any pending delta, fold it - * into the pending NO_HZ delta. - */ - delta = calc_load_fold_active(this_rq, 0); + delta = calc_load_fold_active(rq, 0); if (delta) { int idx = calc_load_write_idx(); @@ -248,6 +243,24 @@ void calc_load_nohz_start(void) } } +void calc_load_nohz_start(void) +{ + /* + * We're going into NO_HZ mode, if there's any pending delta, fold it + * into the pending NO_HZ delta. + */ + calc_load_nohz_fold(this_rq()); +} + +/* + * Keep track of the load for NOHZ_FULL, must be called between + * calc_load_nohz_{start,stop}(). + */ +void calc_load_nohz_remote(struct rq *rq) +{ + calc_load_nohz_fold(rq); +} + void calc_load_nohz_stop(void) { struct rq *this_rq = this_rq(); @@ -268,7 +281,7 @@ void calc_load_nohz_stop(void) this_rq->calc_load_update += LOAD_FREQ; } -static long calc_load_nohz_fold(void) +static long calc_load_nohz_read(void) { int idx = calc_load_read_idx(); long delta = 0; @@ -323,7 +336,7 @@ static void calc_global_nohz(void) } #else /* !CONFIG_NO_HZ_COMMON */ -static inline long calc_load_nohz_fold(void) { return 0; } +static inline long calc_load_nohz_read(void) { return 0; } static inline void calc_global_nohz(void) { } #endif /* CONFIG_NO_HZ_COMMON */ @@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks) /* * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs. */ - delta = calc_load_nohz_fold(); + delta = calc_load_nohz_read(); if (delta) atomic_long_add(delta, &calc_load_tasks); diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index a96db50d40e0..22c20e28b613 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -236,6 +236,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } +#ifndef CONFIG_SCHED_BMQ /* * sched_entity: * @@ -352,6 +353,7 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) return 0; } +#endif #ifdef CONFIG_HAVE_SCHED_AVG_IRQ /* diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index afff644da065..4da52afaeff8 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -1,11 +1,13 @@ #ifdef CONFIG_SMP #include "sched-pelt.h" +#ifndef CONFIG_SCHED_BMQ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se); int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se); int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +#endif #ifdef CONFIG_HAVE_SCHED_AVG_IRQ int update_irq_load_avg(struct rq *rq, u64 running); @@ -17,6 +19,7 @@ update_irq_load_avg(struct rq *rq, u64 running) } #endif +#ifndef CONFIG_SCHED_BMQ /* * When a task is dequeued, its estimated utilization should not be update if * its util_avg has not been updated at least once. @@ -137,9 +140,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq) return rq_clock_pelt(rq_of(cfs_rq)); } #endif +#endif /* CONFIG_SCHED_BMQ */ #else +#ifndef CONFIG_SCHED_BMQ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) { @@ -157,6 +162,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running) { return 0; } +#endif static inline int update_irq_load_avg(struct rq *rq, u64 running) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index ce8f6748678a..9154e745f097 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1199,6 +1199,9 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; + if (!nbytes) + return -EINVAL; + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 280a3c735935..db07c37806bc 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2,6 +2,10 @@ /* * Scheduler internal types and methods: */ +#ifdef CONFIG_SCHED_BMQ +#include "bmq_sched.h" +#else + #include #include @@ -2487,3 +2491,9 @@ static inline void membarrier_switch_mm(struct rq *rq, { } #endif + +static inline int task_running_nice(struct task_struct *p) +{ + return (task_nice(p) > 0); +} +#endif /* !CONFIG_SCHED_BMQ */ diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 750fb3c67eed..0cc040a28d3f 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -22,8 +22,10 @@ static int show_schedstat(struct seq_file *seq, void *v) } else { struct rq *rq; #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_BMQ struct sched_domain *sd; int dcount = 0; +#endif #endif cpu = (unsigned long)(v - 2); rq = cpu_rq(cpu); @@ -40,6 +42,7 @@ static int show_schedstat(struct seq_file *seq, void *v) seq_printf(seq, "\n"); #ifdef CONFIG_SMP +#ifndef CONFIG_SCHED_BMQ /* domain-specific stats */ rcu_read_lock(); for_each_domain(cpu, sd) { @@ -68,6 +71,7 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->ttwu_move_balance); } rcu_read_unlock(); +#endif #endif } return 0; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 6ec1e595b1d4..16f6a8c84cfc 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -4,6 +4,7 @@ */ #include "sched.h" +#ifndef CONFIG_SCHED_BMQ DEFINE_MUTEX(sched_domains_mutex); /* Protected by sched_domains_mutex: */ @@ -1182,8 +1183,10 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) */ static int default_relax_domain_level = -1; +#endif /* CONFIG_SCHED_BMQ */ int sched_domain_level_max; +#ifndef CONFIG_SCHED_BMQ static int __init setup_relax_domain_level(char *str) { if (kstrtoint(str, 0, &default_relax_domain_level)) @@ -1425,6 +1428,7 @@ sd_init(struct sched_domain_topology_level *tl, return sd; } +#endif /* CONFIG_SCHED_BMQ */ /* * Topology list, bottom-up. @@ -1454,6 +1458,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl) sched_domain_topology = tl; } +#ifndef CONFIG_SCHED_BMQ #ifdef CONFIG_NUMA static const struct cpumask *sd_numa_mask(int cpu) @@ -1879,6 +1884,42 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve return sd; } +/* + * Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for + * any two given CPUs at this (non-NUMA) topology level. + */ +static bool topology_span_sane(struct sched_domain_topology_level *tl, + const struct cpumask *cpu_map, int cpu) +{ + int i; + + /* NUMA levels are allowed to overlap */ + if (tl->flags & SDTL_OVERLAP) + return true; + + /* + * Non-NUMA levels cannot partially overlap - they must be either + * completely equal or completely disjoint. Otherwise we can end up + * breaking the sched_group lists - i.e. a later get_group() pass + * breaks the linking done for an earlier span. + */ + for_each_cpu(i, cpu_map) { + if (i == cpu) + continue; + /* + * We should 'and' all those masks with 'cpu_map' to exactly + * match the topology we're about to build, but that can only + * remove CPUs, which only lessens our ability to detect + * overlaps + */ + if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) && + cpumask_intersects(tl->mask(cpu), tl->mask(i))) + return false; + } + + return true; +} + /* * Find the sched_domain_topology_level where all CPU capacities are visible * for all CPUs. @@ -1975,6 +2016,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att has_asym = true; } + if (WARN_ON(!topology_span_sane(tl, cpu_map, i))) + goto error; + sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i); if (tl == sched_domain_topology) @@ -2289,3 +2333,17 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); mutex_unlock(&sched_domains_mutex); } +#else /* CONFIG_SCHED_BMQ */ +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new) +{} + +#ifdef CONFIG_NUMA +int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; + +int sched_numa_find_closest(const struct cpumask *cpus, int cpu) +{ + return best_mask_cpu(cpu, cpus); +} +#endif /* CONFIG_NUMA */ +#endif diff --git a/kernel/signal.c b/kernel/signal.c index bcd46f547db3..eea748174ade 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -413,27 +413,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -447,8 +452,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 70665934d53e..6d56b110fa2e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -110,6 +110,9 @@ extern int core_uses_pid; extern char core_pattern[]; extern unsigned int core_pipe_limit; #endif +#ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; +#endif extern int pid_max; extern int pid_max_min, pid_max_max; extern int percpu_pagelist_fraction; @@ -132,6 +135,10 @@ static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; static int one_thousand = 1000; +#ifdef CONFIG_SCHED_BMQ +static int __maybe_unused zero = 0; +extern int sched_yield_type; +#endif #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif @@ -300,7 +307,7 @@ static struct ctl_table sysctl_base_table[] = { { } }; -#ifdef CONFIG_SCHED_DEBUG +#if defined(CONFIG_SCHED_DEBUG) && !defined(CONFIG_SCHED_BMQ) static int min_sched_granularity_ns = 100000; /* 100 usecs */ static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */ static int min_wakeup_granularity_ns; /* 0 usecs */ @@ -317,6 +324,7 @@ static int max_extfrag_threshold = 1000; #endif static struct ctl_table kern_table[] = { +#ifndef CONFIG_SCHED_BMQ { .procname = "sched_child_runs_first", .data = &sysctl_sched_child_runs_first, @@ -498,6 +506,7 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif +#endif /* !CONFIG_SCHED_BMQ */ #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", @@ -546,6 +555,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -1070,6 +1088,17 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_SCHED_BMQ + { + .procname = "yield_type", + .data = &sched_yield_type, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &two, + }, +#endif #if defined(CONFIG_S390) && defined(CONFIG_SMP) { .procname = "spin_retry", diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 451f9d05ccfe..b97401f6bc23 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -88,6 +88,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + struct platform_device *pdev; + int ret = 0; if (rtcdev) return -EBUSY; @@ -98,12 +100,14 @@ static int alarmtimer_rtc_add_device(struct device *dev, return -1; __ws = wakeup_source_register(dev, "alarmtimer"); + pdev = platform_device_register_data(dev, "alarmtimer", + PLATFORM_DEVID_AUTO, NULL, 0); spin_lock_irqsave(&rtcdev_lock, flags); - if (!rtcdev) { + if (__ws && !IS_ERR(pdev) && !rtcdev) { if (!try_module_get(rtc->owner)) { - spin_unlock_irqrestore(&rtcdev_lock, flags); - return -1; + ret = -1; + goto unlock; } rtcdev = rtc; @@ -111,12 +115,17 @@ static int alarmtimer_rtc_add_device(struct device *dev, get_device(dev); ws = __ws; __ws = NULL; + pdev = NULL; + } else { + ret = -1; } +unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); + platform_device_unregister(pdev); wakeup_source_unregister(__ws); - return 0; + return ret; } static inline void alarmtimer_rtc_timer_init(void) @@ -874,8 +883,7 @@ static struct platform_driver alarmtimer_driver = { */ static int __init alarmtimer_init(void) { - struct platform_device *pdev; - int error = 0; + int error; int i; alarmtimer_rtc_timer_init(); @@ -898,15 +906,7 @@ static int __init alarmtimer_init(void) if (error) goto out_if; - pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0); - if (IS_ERR(pdev)) { - error = PTR_ERR(pdev); - goto out_drv; - } return 0; - -out_drv: - platform_driver_unregister(&alarmtimer_driver); out_if: alarmtimer_rtc_interface_remove(); return error; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fff5f64981c6..428beb69426a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -293,8 +293,15 @@ static void clocksource_watchdog(struct timer_list *unused) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 42d512fcfda2..70b97fe0ff44 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -226,7 +226,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples) u64 stime, utime; task_cputime(p, &utime, &stime); - store_samples(samples, stime, utime, p->se.sum_exec_runtime); + store_samples(samples, stime, utime, tsk_seruntime(p)); } static void proc_sample_cputime_atomic(struct task_cputime_atomic *at, @@ -796,6 +796,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples, } } +#ifndef CONFIG_SCHED_BMQ static inline void check_dl_overrun(struct task_struct *tsk) { if (tsk->dl.dl_overrun) { @@ -803,6 +804,7 @@ static inline void check_dl_overrun(struct task_struct *tsk) __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } +#endif static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard) { @@ -830,8 +832,10 @@ static void check_thread_timers(struct task_struct *tsk, u64 samples[CPUCLOCK_MAX]; unsigned long soft; +#ifndef CONFIG_SCHED_BMQ if (dl_task(tsk)) check_dl_overrun(tsk); +#endif if (expiry_cache_is_inactive(pct)) return; @@ -845,7 +849,7 @@ static void check_thread_timers(struct task_struct *tsk, soft = task_rlimit(tsk, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { /* Task RT timeout is accounted in jiffies. RTTIME is usec */ - unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ); + unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ); unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); /* At the hard limit, send SIGKILL. No further action. */ @@ -1099,8 +1103,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk) return true; } +#ifndef CONFIG_SCHED_BMQ if (dl_task(tsk) && tsk->dl.dl_overrun) return true; +#endif return false; } diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c index 5ee0f7709410..9577c89179cd 100644 --- a/kernel/time/vsyscall.c +++ b/kernel/time/vsyscall.c @@ -28,11 +28,6 @@ static inline void update_vdso_data(struct vdso_data *vdata, vdata[CS_RAW].mult = tk->tkr_raw.mult; vdata[CS_RAW].shift = tk->tkr_raw.shift; - /* CLOCK_REALTIME */ - vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; - vdso_ts->sec = tk->xtime_sec; - vdso_ts->nsec = tk->tkr_mono.xtime_nsec; - /* CLOCK_MONOTONIC */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; @@ -70,12 +65,6 @@ static inline void update_vdso_data(struct vdso_data *vdata, vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; vdso_ts->nsec = tk->tkr_mono.xtime_nsec; - - /* - * Read without the seqlock held by clock_getres(). - * Note: No need to have a second copy. - */ - WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); } void update_vsyscall(struct timekeeper *tk) @@ -84,20 +73,17 @@ void update_vsyscall(struct timekeeper *tk) struct vdso_timestamp *vdso_ts; u64 nsec; - if (__arch_update_vdso_data()) { - /* - * Some architectures might want to skip the update of the - * data page. - */ - return; - } - /* copy vsyscall data */ vdso_write_begin(vdata); vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); + /* CLOCK_REALTIME also required for time() */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + /* CLOCK_REALTIME_COARSE */ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; vdso_ts->sec = tk->xtime_sec; @@ -110,7 +96,18 @@ void update_vsyscall(struct timekeeper *tk) nsec = nsec + tk->wall_to_monotonic.tv_nsec; vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec); - update_vdso_data(vdata, tk); + /* + * Read without the seqlock held by clock_getres(). + * Note: No need to have a second copy. + */ + WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); + + /* + * Architectures can opt out of updating the high resolution part + * of the VDSO. + */ + if (__arch_update_vdso_data()) + update_vdso_data(vdata, tk); __arch_update_vsyscall(vdata, tk); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 475e29498bca..840ef7af20e0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -335,6 +335,7 @@ static void put_probe_ref(void) static void blk_trace_cleanup(struct blk_trace *bt) { + synchronize_rcu(); blk_trace_free(bt); put_probe_ref(); } @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, static int __blk_trace_startstop(struct request_queue *q, int start) { int ret; - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (bt == NULL) return -EINVAL; @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) void blk_trace_shutdown(struct request_queue *q) { mutex_lock(&q->blk_trace_mutex); - - if (q->blk_trace) { + if (rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex))) { __blk_trace_startstop(q, 0); __blk_trace_remove(q); } @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q) #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + /* We don't use the 'bt' value here except as an optimization... */ + bt = rcu_dereference_protected(q->blk_trace, 1); if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, u64 cgid) { - struct blk_trace *bt = rq->q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(rq->q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error, __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), rq->cmd_flags, what, error, 0, NULL, cgid); + rcu_read_unlock(); } static void blk_add_trace_rq_insert(void *ignore, @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } static void blk_add_trace_bio_bounce(void *ignore, @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore, if (bio) blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); else { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 0, 0, NULL, 0); + rcu_read_unlock(); } } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u32 what; @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } + rcu_read_unlock(); } static void blk_add_trace_split(void *ignore, struct request_queue *q, struct bio *bio, unsigned int pdu) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore, BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); } + rcu_read_unlock(); } /** @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore, struct request_queue *q, struct bio *bio, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio_dev(bio)); @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); + rcu_read_unlock(); } /** @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; struct blk_io_trace_remap r; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore, __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } /** @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len) { - struct blk_trace *bt = q->blk_trace; + struct blk_trace *bt; - if (likely(!bt)) + rcu_read_lock(); + bt = rcu_dereference(q->blk_trace); + if (likely(!bt)) { + rcu_read_unlock(); return; + } __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q) return -EINVAL; put_probe_ref(); + synchronize_rcu(); blk_trace_free(bt); return 0; } @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q; struct block_device *bdev; + struct blk_trace *bt; ssize_t ret = -ENXIO; bdev = bdget(part_devt(p)); @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - ret = sprintf(buf, "%u\n", !!q->blk_trace); + ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } - if (q->blk_trace == NULL) + if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) - ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); + ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) - ret = sprintf(buf, "%u\n", q->blk_trace->pid); + ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); + ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) - ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); + ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: mutex_unlock(&q->blk_trace_mutex); @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct block_device *bdev; struct request_queue *q; struct hd_struct *p; + struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, mutex_lock(&q->blk_trace_mutex); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); if (attr == &dev_attr_enable) { - if (!!value == !!q->blk_trace) { + if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } @@ -1844,18 +1896,21 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, } ret = 0; - if (q->blk_trace == NULL) + if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); + bt = rcu_dereference_protected(q->blk_trace, + lockdep_is_held(&q->blk_trace_mutex)); + } if (ret == 0) { if (attr == &dev_attr_act_mask) - q->blk_trace->act_mask = value; + bt->act_mask = value; else if (attr == &dev_attr_pid) - q->blk_trace->pid = value; + bt->pid = value; else if (attr == &dev_attr_start_lba) - q->blk_trace->start_lba = value; + bt->start_lba = value; else if (attr == &dev_attr_end_lba) - q->blk_trace->end_lba = value; + bt->end_lba = value; } out_unlock_bdev: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9bf1f2cd515e..0f5b9834b8f6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1552,6 +1552,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) rec = bsearch(&key, pg->records, pg->index, sizeof(struct dyn_ftrace), ftrace_cmp_recs); + if (rec) + break; } return rec; } @@ -5596,8 +5598,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5872,8 +5874,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_rcu(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } @@ -7031,9 +7040,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b6ee4aadc26..256ac508196f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1827,6 +1827,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { /* This loop can take minutes when sanitizers are enabled, so * lets make sure we allow RCU processing. @@ -1849,6 +1850,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 63bf60f79398..85f475bb4823 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -950,22 +950,31 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions @@ -1001,10 +1010,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6ac35b9e195d..862fb6d16edb 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -470,11 +470,12 @@ struct action_data { * When a histogram trigger is hit, the values of any * references to variables, including variables being passed * as parameters to synthetic events, are collected into a - * var_ref_vals array. This var_ref_idx is the index of the - * first param in the array to be passed to the synthetic - * event invocation. + * var_ref_vals array. This var_ref_idx array is an array of + * indices into the var_ref_vals array, one for each synthetic + * event param, and is passed to the synthetic event + * invocation. */ - unsigned int var_ref_idx; + unsigned int var_ref_idx[TRACING_MAP_VARS_MAX]; struct synth_event *synth_event; bool use_trace_keyword; char *synth_event_name; @@ -810,6 +811,29 @@ static const char *synth_field_fmt(char *type) return fmt; } +static void print_synth_event_num_val(struct trace_seq *s, + char *print_fmt, char *name, + int size, u64 val, char *space) +{ + switch (size) { + case 1: + trace_seq_printf(s, print_fmt, name, (u8)val, space); + break; + + case 2: + trace_seq_printf(s, print_fmt, name, (u16)val, space); + break; + + case 4: + trace_seq_printf(s, print_fmt, name, (u32)val, space); + break; + + default: + trace_seq_printf(s, print_fmt, name, val, space); + break; + } +} + static enum print_line_t print_synth_event(struct trace_iterator *iter, int flags, struct trace_event *event) @@ -848,10 +872,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter, } else { struct trace_print_flags __flags[] = { __def_gfpflag_names, {-1, NULL} }; + char *space = (i == se->n_fields - 1 ? "" : " "); - trace_seq_printf(s, print_fmt, se->fields[i]->name, - entry->fields[n_u64], - i == se->n_fields - 1 ? "" : " "); + print_synth_event_num_val(s, print_fmt, + se->fields[i]->name, + se->fields[i]->size, + entry->fields[n_u64], + space); if (strcmp(se->fields[i]->type, "gfp_t") == 0) { trace_seq_puts(s, " ("); @@ -875,14 +902,14 @@ static struct trace_event_functions synth_event_funcs = { static notrace void trace_event_raw_event_synth(void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; struct ring_buffer *buffer; struct synth_event *event; - unsigned int i, n_u64; + unsigned int i, n_u64, val_idx; int fields_size = 0; event = trace_file->event_call->data; @@ -905,15 +932,16 @@ static notrace void trace_event_raw_event_synth(void *__data, goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { + val_idx = var_ref_idx[i]; if (event->fields[i]->is_string) { - char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; + char *str_val = (char *)(long)var_ref_vals[val_idx]; char *str_field = (char *)&entry->fields[n_u64]; strscpy(str_field, str_val, STR_VAR_LEN_MAX); n_u64 += STR_VAR_LEN_MAX / sizeof(u64); } else { struct synth_field *field = event->fields[i]; - u64 val = var_ref_vals[var_ref_idx + i]; + u64 val = var_ref_vals[val_idx]; switch (field->size) { case 1: @@ -1113,10 +1141,10 @@ static struct tracepoint *alloc_synth_tracepoint(char *name) } typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, - unsigned int var_ref_idx); + unsigned int *var_ref_idx); static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, - unsigned int var_ref_idx) + unsigned int *var_ref_idx) { struct tracepoint *tp = event->tp; @@ -2035,12 +2063,6 @@ static int parse_map_size(char *str) unsigned long size, map_bits; int ret; - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } - ret = kstrtoul(str, 0, &size); if (ret) goto out; @@ -2100,25 +2122,25 @@ static int parse_action(char *str, struct hist_trigger_attrs *attrs) static int parse_assignment(struct trace_array *tr, char *str, struct hist_trigger_attrs *attrs) { - int ret = 0; + int len, ret = 0; - if ((str_has_prefix(str, "key=")) || - (str_has_prefix(str, "keys="))) { - attrs->keys_str = kstrdup(str, GFP_KERNEL); + if ((len = str_has_prefix(str, "key=")) || + (len = str_has_prefix(str, "keys="))) { + attrs->keys_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->keys_str) { ret = -ENOMEM; goto out; } - } else if ((str_has_prefix(str, "val=")) || - (str_has_prefix(str, "vals=")) || - (str_has_prefix(str, "values="))) { - attrs->vals_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "val=")) || + (len = str_has_prefix(str, "vals=")) || + (len = str_has_prefix(str, "values="))) { + attrs->vals_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->vals_str) { ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "sort=")) { - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); + } else if ((len = str_has_prefix(str, "sort="))) { + attrs->sort_key_str = kstrdup(str + len, GFP_KERNEL); if (!attrs->sort_key_str) { ret = -ENOMEM; goto out; @@ -2129,12 +2151,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "clock=")) { - strsep(&str, "="); - if (!str) { - ret = -EINVAL; - goto out; - } + } else if ((len = str_has_prefix(str, "clock="))) { + str += len; str = strstrip(str); attrs->clock = kstrdup(str, GFP_KERNEL); @@ -2142,8 +2160,8 @@ static int parse_assignment(struct trace_array *tr, ret = -ENOMEM; goto out; } - } else if (str_has_prefix(str, "size=")) { - int map_bits = parse_map_size(str); + } else if ((len = str_has_prefix(str, "size="))) { + int map_bits = parse_map_size(str + len); if (map_bits < 0) { ret = map_bits; @@ -2183,8 +2201,14 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str) while (trigger_str) { char *str = strsep(&trigger_str, ":"); + char *rhs; - if (strchr(str, '=')) { + rhs = strchr(str, '='); + if (rhs) { + if (!strlen(++rhs)) { + ret = -EINVAL; + goto free; + } ret = parse_assignment(tr, str, attrs); if (ret) goto free; @@ -2655,6 +2679,22 @@ static int init_var_ref(struct hist_field *ref_field, goto out; } +static int find_var_ref_idx(struct hist_trigger_data *hist_data, + struct hist_field *var_field) +{ + struct hist_field *ref_field; + int i; + + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) + return i; + } + + return -ENOENT; +} + /** * create_var_ref - Create a variable reference and attach it to trigger * @hist_data: The trigger that will be referencing the variable @@ -4228,11 +4268,11 @@ static int trace_action_create(struct hist_trigger_data *hist_data, struct trace_array *tr = hist_data->event_file->tr; char *event_name, *param, *system = NULL; struct hist_field *hist_field, *var_ref; - unsigned int i, var_ref_idx; + unsigned int i; unsigned int field_pos = 0; struct synth_event *event; char *synth_event_name; - int ret = 0; + int var_ref_idx, ret = 0; lockdep_assert_held(&event_mutex); @@ -4249,8 +4289,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, event->ref++; - var_ref_idx = hist_data->n_var_refs; - for (i = 0; i < data->n_params; i++) { char *p; @@ -4299,6 +4337,14 @@ static int trace_action_create(struct hist_trigger_data *hist_data, goto err; } + var_ref_idx = find_var_ref_idx(hist_data, var_ref); + if (WARN_ON(var_ref_idx < 0)) { + ret = var_ref_idx; + goto err; + } + + data->var_ref_idx[i] = var_ref_idx; + field_pos++; kfree(p); continue; @@ -4317,7 +4363,6 @@ static int trace_action_create(struct hist_trigger_data *hist_data, } data->synth_event = event; - data->var_ref_idx = var_ref_idx; out: return ret; err: @@ -4536,10 +4581,6 @@ static int create_val_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = 0, j = 1; i < TRACING_MAP_VALS_MAX && j < TRACING_MAP_VALS_MAX; i++) { field_str = strsep(&fields_str, ","); @@ -4634,10 +4675,6 @@ static int create_key_fields(struct hist_trigger_data *hist_data, if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) - goto out; - for (i = n_vals; i < n_vals + TRACING_MAP_KEYS_MAX; i++) { field_str = strsep(&fields_str, ","); if (!field_str) @@ -4795,12 +4832,6 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) if (!fields_str) goto out; - strsep(&fields_str, "="); - if (!fields_str) { - ret = -EINVAL; - goto out; - } - for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { struct hist_field *hist_field; char *field_str, *field_name; @@ -4809,9 +4840,11 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) sort_key = &hist_data->sort_keys[i]; field_str = strsep(&fields_str, ","); - if (!field_str) { - if (i == 0) - ret = -EINVAL; + if (!field_str) + break; + + if (!*field_str) { + ret = -EINVAL; break; } @@ -4821,7 +4854,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data) } field_name = strsep(&field_str, "."); - if (!field_name) { + if (!field_name || !*field_name) { ret = -EINVAL; break; } diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 40106fff06a4..287d77eae59b 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -116,9 +116,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 9ae87be422f2..ab8b6436d53f 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -876,7 +876,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, for (i = 0; i < tp->nr_args; i++) { parg = tp->args + i; if (parg->count) { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s[%d])"; else fmt = ", REC->%s[%d]"; @@ -884,7 +885,8 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, pos += snprintf(buf + pos, LEN_OR_ZERO, fmt, parg->name, j); } else { - if (strcmp(parg->type->name, "string") == 0) + if ((strcmp(parg->type->name, "string") == 0) || + (strcmp(parg->type->name, "ustring") == 0)) fmt = ", __get_str(%s)"; else fmt = ", REC->%s"; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..208788fcbb0e 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -1048,10 +1048,15 @@ static int trace_wakeup_test_thread(void *data) { /* Make this a -deadline thread */ static const struct sched_attr attr = { +#ifdef CONFIG_SCHED_BMQ + /* No deadline on BMQ, use RR */ + .sched_policy = SCHED_RR, +#else .sched_policy = SCHED_DEADLINE, .sched_runtime = 100000ULL, .sched_deadline = 10000000ULL, .sched_period = 10000000ULL +#endif }; struct wakeup_test_data *x = data; diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 874f1274cf99..d1fa19773cc8 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -280,18 +280,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, @@ -304,7 +308,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -315,17 +319,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -334,15 +336,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8eadadc478f9..c36ecd19562c 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index f41334ef0971..cbd3cf503c90 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -161,6 +161,8 @@ static void lockup_detector_update_enable(void) #ifdef CONFIG_SOFTLOCKUP_DETECTOR +#define SOFTLOCKUP_RESET ULONG_MAX + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -274,7 +276,7 @@ notrace void touch_softlockup_watchdog_sched(void) * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ - raw_cpu_write(watchdog_touch_ts, 0); + raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) @@ -298,14 +300,14 @@ void touch_all_softlockup_watchdogs(void) * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) - per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); - __this_cpu_write(watchdog_touch_ts, 0); + __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) @@ -383,7 +385,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - if (touch_ts == 0) { + if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically diff --git a/kernel/workqueue.c b/kernel/workqueue.c index cfc923558e04..6978892b0d8a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, return; rcu_read_lock(); retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c index 6d83cafebc69..ad0699ce702f 100644 --- a/lib/crypto/chacha20poly1305.c +++ b/lib/crypto/chacha20poly1305.c @@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src, __le64 lens[2]; } b __aligned(16); + if (WARN_ON(src_len > INT_MAX)) + return false; + chacha_load_key(b.k, key); b.iv[0] = 0; diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 61261195f5b6..48054dbf1b51 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -132,14 +132,18 @@ static void fill_pool(void) struct debug_obj *obj; unsigned long flags; - if (likely(obj_pool_free >= debug_objects_pool_min_level)) + if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level)) return; /* * Reuse objs from the global free list; they will be reinitialized * when allocating. + * + * Both obj_nr_tofree and obj_pool_free are checked locklessly; the + * READ_ONCE()s pair with the WRITE_ONCE()s in pool_lock critical + * sections. */ - while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { + while (READ_ONCE(obj_nr_tofree) && (READ_ONCE(obj_pool_free) < obj_pool_min_free)) { raw_spin_lock_irqsave(&pool_lock, flags); /* * Recheck with the lock held as the worker thread might have @@ -148,9 +152,9 @@ static void fill_pool(void) while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) { obj = hlist_entry(obj_to_free.first, typeof(*obj), node); hlist_del(&obj->node); - obj_nr_tofree--; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -158,7 +162,7 @@ static void fill_pool(void) if (unlikely(!obj_cache)) return; - while (obj_pool_free < debug_objects_pool_min_level) { + while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) { struct debug_obj *new[ODEBUG_BATCH_SIZE]; int cnt; @@ -174,7 +178,7 @@ static void fill_pool(void) while (cnt) { hlist_add_head(&new[--cnt]->node, &obj_pool); debug_objects_allocated++; - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -236,7 +240,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj = __alloc_object(&obj_pool); if (obj) { obj_pool_used++; - obj_pool_free--; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); /* * Looking ahead, allocate one batch of debug objects and @@ -255,7 +259,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) &percpu_pool->free_objs); percpu_pool->obj_free++; obj_pool_used++; - obj_pool_free--; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); } } @@ -309,8 +313,8 @@ static void free_obj_work(struct work_struct *work) obj = hlist_entry(obj_to_free.first, typeof(*obj), node); hlist_del(&obj->node); hlist_add_head(&obj->node, &obj_pool); - obj_pool_free++; - obj_nr_tofree--; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); } raw_spin_unlock_irqrestore(&pool_lock, flags); return; @@ -324,7 +328,7 @@ static void free_obj_work(struct work_struct *work) if (obj_nr_tofree) { hlist_move_list(&obj_to_free, &tofree); debug_objects_freed += obj_nr_tofree; - obj_nr_tofree = 0; + WRITE_ONCE(obj_nr_tofree, 0); } raw_spin_unlock_irqrestore(&pool_lock, flags); @@ -375,10 +379,10 @@ static void __free_object(struct debug_obj *obj) obj_pool_used--; if (work) { - obj_nr_tofree++; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); hlist_add_head(&obj->node, &obj_to_free); if (lookahead_count) { - obj_nr_tofree += lookahead_count; + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count); obj_pool_used -= lookahead_count; while (lookahead_count) { hlist_add_head(&objs[--lookahead_count]->node, @@ -396,15 +400,15 @@ static void __free_object(struct debug_obj *obj) for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { obj = __alloc_object(&obj_pool); hlist_add_head(&obj->node, &obj_to_free); - obj_pool_free--; - obj_nr_tofree++; + WRITE_ONCE(obj_pool_free, obj_pool_free - 1); + WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); } } } else { - obj_pool_free++; + WRITE_ONCE(obj_pool_free, obj_pool_free + 1); hlist_add_head(&obj->node, &obj_pool); if (lookahead_count) { - obj_pool_free += lookahead_count; + WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count); obj_pool_used -= lookahead_count; while (lookahead_count) { hlist_add_head(&objs[--lookahead_count]->node, @@ -423,7 +427,7 @@ static void __free_object(struct debug_obj *obj) static void free_object(struct debug_obj *obj) { __free_object(obj); - if (!obj_freeing && obj_nr_tofree) { + if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -982,7 +986,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size) debug_objects_maxchecked = objs_checked; /* Schedule work to actually kmem_cache_free() objects */ - if (!obj_freeing && obj_nr_tofree) { + if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -1008,12 +1012,12 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); seq_printf(m, "warnings :%d\n", debug_objects_warnings); seq_printf(m, "fixups :%d\n", debug_objects_fixups); - seq_printf(m, "pool_free :%d\n", obj_pool_free + obj_percpu_free); + seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free); seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free); seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); - seq_printf(m, "on_free_list :%d\n", obj_nr_tofree); + seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree)); seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); seq_printf(m, "objs_freed :%d\n", debug_objects_freed); return 0; diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c index 55686839eb61..6b9c5242017f 100644 --- a/lib/kunit/try-catch.c +++ b/lib/kunit/try-catch.c @@ -12,7 +12,6 @@ #include #include #include -#include void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch) { @@ -31,8 +30,6 @@ static int kunit_generic_run_threadfn_adapter(void *data) static unsigned long kunit_test_timeout(void) { - unsigned long timeout_msecs; - /* * TODO(brendanhiggins@google.com): We should probably have some type of * variable timeout here. The only question is what that timeout value @@ -49,22 +46,11 @@ static unsigned long kunit_test_timeout(void) * * For more background on this topic, see: * https://mike-bland.com/2011/11/01/small-medium-large.html + * + * If tests timeout due to exceeding sysctl_hung_task_timeout_secs, + * the task will be killed and an oops generated. */ - if (sysctl_hung_task_timeout_secs) { - /* - * If sysctl_hung_task is active, just set the timeout to some - * value less than that. - * - * In regards to the above TODO, if we decide on variable - * timeouts, this logic will likely need to change. - */ - timeout_msecs = (sysctl_hung_task_timeout_secs - 1) * - MSEC_PER_SEC; - } else { - timeout_msecs = 300 * MSEC_PER_SEC; /* 5 min */ - } - - return timeout_msecs; + return 300 * MSEC_PER_SEC; /* 5 min */ } void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context) diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 9c485df1308f..f02e10fa6238 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -56,8 +56,8 @@ int main(int argc, char *argv[]) uint8_t v; uint8_t exptbl[256], invtbl[256]; - printf("#include \n"); printf("#include \n"); + printf("#include \n"); /* Compute multiplication table */ printf("\nconst u8 __attribute__((aligned(256)))\n" diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c2cf2c311b7d..5813072bc589 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -311,7 +311,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (prv) table->nents = ++table->orig_nents; - return -ENOMEM; + return -ENOMEM; } sg_init_table(sg, alloc_size); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index ed717dd08ff3..81c69c08d1d1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -83,15 +83,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in stack_depot_save(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index e14a15ac250b..71ec3afe1681 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -275,22 +275,23 @@ static void __init test_copy(void) static void __init test_replace(void) { unsigned int nbits = 64; + unsigned int nlongs = DIV_ROUND_UP(nbits, BITS_PER_LONG); DECLARE_BITMAP(bmap, 1024); bitmap_zero(bmap, 1024); - bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits); + bitmap_replace(bmap, &exp2[0 * nlongs], &exp2[1 * nlongs], exp2_to_exp3_mask, nbits); expect_eq_bitmap(bmap, exp3_0_1, nbits); bitmap_zero(bmap, 1024); - bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits); + bitmap_replace(bmap, &exp2[1 * nlongs], &exp2[0 * nlongs], exp2_to_exp3_mask, nbits); expect_eq_bitmap(bmap, exp3_1_0, nbits); bitmap_fill(bmap, 1024); - bitmap_replace(bmap, &exp2[0], &exp2[1], exp2_to_exp3_mask, nbits); + bitmap_replace(bmap, &exp2[0 * nlongs], &exp2[1 * nlongs], exp2_to_exp3_mask, nbits); expect_eq_bitmap(bmap, exp3_0_1, nbits); bitmap_fill(bmap, 1024); - bitmap_replace(bmap, &exp2[1], &exp2[0], exp2_to_exp3_mask, nbits); + bitmap_replace(bmap, &exp2[1 * nlongs], &exp2[0 * nlongs], exp2_to_exp3_mask, nbits); expect_eq_bitmap(bmap, exp3_1_0, nbits); } diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 328d33beae36..3872d250ed2c 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -158,6 +158,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void) if (!ptr1 || !ptr2) { pr_err("Allocation failed\n"); kfree(ptr1); + kfree(ptr2); return; } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index c360f6a6c844..62f05f605fb5 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { EXPORT_SYMBOL_GPL(noop_backing_dev_info); static struct class *bdi_class; +const char *bdi_unknown_name = "(unknown)"; /* * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU diff --git a/mm/debug.c b/mm/debug.c index 0461df1207cb..6a52316af839 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -47,6 +47,7 @@ void __dump_page(struct page *page, const char *reason) struct address_space *mapping; bool page_poisoned = PagePoisoned(page); int mapcount; + char *type = ""; /* * If struct page is poisoned don't access Page*() functions as that @@ -78,9 +79,9 @@ void __dump_page(struct page *page, const char *reason) page, page_ref_count(page), mapcount, page->mapping, page_to_pgoff(page)); if (PageKsm(page)) - pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags); + type = "ksm "; else if (PageAnon(page)) - pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags); + type = "anon "; else if (mapping) { if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; @@ -88,10 +89,11 @@ void __dump_page(struct page *page, const char *reason) pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry); } else pr_warn("%ps\n", mapping->a_ops); - pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags); } BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1); + pr_warn("%sflags: %#lx(%pGp)\n", type, page->flags, &page->flags); + hex_only: print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, diff --git a/mm/gup.c b/mm/gup.c index 7646bf993b25..5244b8090440 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2415,7 +2415,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, unsigned long addr, len, end; int nr = 0, ret = 0; - if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM))) + if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | + FOLL_FORCE))) return -EINVAL; start = untagged_addr(start) & PAGE_MASK; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a88093213674..c9f8163bd5bf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -177,16 +177,13 @@ static ssize_t enabled_store(struct kobject *kobj, { ssize_t ret = count; - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); } else @@ -250,32 +247,27 @@ static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer+madvise", buf, - min(sizeof("defer+madvise")-1, count))) { + } else if (sysfs_streq(buf, "defer+madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { + } else if (sysfs_streq(buf, "defer")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); @@ -2712,7 +2704,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) unsigned long flags; pgoff_t end; - VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); @@ -3040,8 +3032,7 @@ void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, return; flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); - pmdval = *pvmw->pmd; - pmdp_invalidate(vma, address, pvmw->pmd); + pmdval = pmdp_invalidate(vma, address, pvmw->pmd); if (pmd_dirty(pmdval)) set_page_dirty(page); entry = make_migration_entry(page, pmd_write(pmdval)); diff --git a/mm/ksm.c b/mm/ksm.c index d17c7d57d0d8..feef0631734e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2425,54 +2425,78 @@ static int ksm_scan_thread(void *nothing) return 0; } -int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags) +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags) { - struct mm_struct *mm = vma->vm_mm; int err; - switch (advice) { - case MADV_MERGEABLE: - /* - * Be somewhat over-protective for now! - */ - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_MIXEDMAP)) - return 0; /* just ignore the advice */ + /* + * Be somewhat over-protective for now! + */ + if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return 0; /* just ignore the advice */ - if (vma_is_dax(vma)) - return 0; + if (vma_is_dax(vma)) + return 0; #ifdef VM_SAO - if (*vm_flags & VM_SAO) - return 0; + if (*vm_flags & VM_SAO) + return 0; #endif #ifdef VM_SPARC_ADI - if (*vm_flags & VM_SPARC_ADI) - return 0; + if (*vm_flags & VM_SPARC_ADI) + return 0; #endif - if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { - err = __ksm_enter(mm); - if (err) - return err; - } + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { + err = __ksm_enter(mm); + if (err) + return err; + } - *vm_flags |= VM_MERGEABLE; - break; + *vm_flags |= VM_MERGEABLE; - case MADV_UNMERGEABLE: - if (!(*vm_flags & VM_MERGEABLE)) - return 0; /* just ignore the advice */ + return 0; +} - if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, start, end); - if (err) - return err; - } +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags) +{ + int err; + + if (!(*vm_flags & VM_MERGEABLE)) + return 0; /* just ignore the advice */ + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, start, end); + if (err) + return err; + } + + *vm_flags &= ~VM_MERGEABLE; + + return 0; +} + +int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags) +{ + struct mm_struct *mm = vma->vm_mm; + int err; - *vm_flags &= ~VM_MERGEABLE; + switch (advice) { + case MADV_MERGEABLE: + err = ksm_madvise_merge(mm, vma, vm_flags); + if (err) + return err; + break; + + case MADV_UNMERGEABLE: + err = ksm_madvise_unmerge(vma, start, end, vm_flags); + if (err) + return err; break; } diff --git a/mm/madvise.c b/mm/madvise.c index bcdb6a042787..1f5dd506778b 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, } page = pmd_page(orig_pmd); + + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + goto huge_unlock; + if (next - addr != HPAGE_PMD_SIZE) { int err; - if (page_mapcount(page) != 1) - goto huge_unlock; - get_page(page); spin_unlock(ptl); lock_page(page); @@ -426,6 +428,10 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, continue; } + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + continue; + VM_BUG_ON_PAGE(PageTransCompound(page), page); if (pte_young(ptent)) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6c83cf4ed970..c051ca158b1e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -409,8 +409,10 @@ int memcg_expand_shrinker_maps(int new_id) if (mem_cgroup_is_root(memcg)) continue; ret = memcg_expand_one_shrinker_map(memcg, size, old_size); - if (ret) + if (ret) { + mem_cgroup_iter_break(NULL, memcg); goto unlock; + } } unlock: if (!ret) @@ -2295,28 +2297,41 @@ static void high_work_func(struct work_struct *work) #define MEMCG_DELAY_SCALING_SHIFT 14 /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Get the number of jiffies that we should penalise a mischievous cgroup which + * is exceeding its memory.high by checking both it and its ancestors. */ -void mem_cgroup_handle_over_high(void) +static unsigned long calculate_high_delay(struct mem_cgroup *memcg, + unsigned int nr_pages) { - unsigned long usage, high, clamped_high; - unsigned long pflags; - unsigned long penalty_jiffies, overage; - unsigned int nr_pages = current->memcg_nr_pages_over_high; - struct mem_cgroup *memcg; + unsigned long penalty_jiffies; + u64 max_overage = 0; - if (likely(!nr_pages)) - return; + do { + unsigned long usage, high; + u64 overage; - memcg = get_mem_cgroup_from_mm(current->mm); - reclaim_high(memcg, nr_pages, GFP_KERNEL); - current->memcg_nr_pages_over_high = 0; + usage = page_counter_read(&memcg->memory); + high = READ_ONCE(memcg->high); + + /* + * Prevent division by 0 in overage calculation by acting as if + * it was a threshold of 1 page + */ + high = max(high, 1UL); + + overage = usage - high; + overage <<= MEMCG_DELAY_PRECISION_SHIFT; + overage = div64_u64(overage, high); + + if (overage > max_overage) + max_overage = overage; + } while ((memcg = parent_mem_cgroup(memcg)) && + !mem_cgroup_is_root(memcg)); + + if (!max_overage) + return 0; /* - * memory.high is breached and reclaim is unable to keep up. Throttle - * allocators proactively to slow down excessive growth. - * * We use overage compared to memory.high to calculate the number of * jiffies to sleep (penalty_jiffies). Ideally this value should be * fairly lenient on small overages, and increasingly harsh when the @@ -2324,24 +2339,9 @@ void mem_cgroup_handle_over_high(void) * its crazy behaviour, so we exponentially increase the delay based on * overage amount. */ - - usage = page_counter_read(&memcg->memory); - high = READ_ONCE(memcg->high); - - if (usage <= high) - goto out; - - /* - * Prevent division by 0 in overage calculation by acting as if it was a - * threshold of 1 page - */ - clamped_high = max(high, 1UL); - - overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT, - clamped_high); - - penalty_jiffies = ((u64)overage * overage * HZ) - >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT); + penalty_jiffies = max_overage * max_overage * HZ; + penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT; + penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT; /* * Factor in the task's own contribution to the overage, such that four @@ -2358,7 +2358,32 @@ void mem_cgroup_handle_over_high(void) * application moving forwards and also permit diagnostics, albeit * extremely slowly. */ - penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); + return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); +} + +/* + * Scheduled by try_charge() to be executed from the userland return path + * and reclaims memory over the high limit. + */ +void mem_cgroup_handle_over_high(void) +{ + unsigned long penalty_jiffies; + unsigned long pflags; + unsigned int nr_pages = current->memcg_nr_pages_over_high; + struct mem_cgroup *memcg; + + if (likely(!nr_pages)) + return; + + memcg = get_mem_cgroup_from_mm(current->mm); + reclaim_high(memcg, nr_pages, GFP_KERNEL); + current->memcg_nr_pages_over_high = 0; + + /* + * memory.high is breached and reclaim is unable to keep up. Throttle + * allocators proactively to slow down excessive growth. + */ + penalty_jiffies = calculate_high_delay(memcg, nr_pages); /* * Don't sleep if the amount of jiffies this memcg owes us is so low @@ -4025,7 +4050,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; unsigned long usage; - int i, j, size; + int i, j, size, entries; mutex_lock(&memcg->thresholds_lock); @@ -4045,14 +4070,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, __mem_cgroup_threshold(memcg, type == _MEMSWAP); /* Calculate new number of threshold */ - size = 0; + size = entries = 0; for (i = 0; i < thresholds->primary->size; i++) { if (thresholds->primary->entries[i].eventfd != eventfd) size++; + else + entries++; } new = thresholds->spare; + /* If no items related to eventfd have been cleared, nothing to do */ + if (!entries) + goto unlock; + /* Set thresholds array to NULL if we don't have thresholds */ if (!size) { kfree(new); @@ -5340,14 +5371,6 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (compound && !list_empty(page_deferred_list(page))) { - spin_lock(&from->deferred_split_queue.split_queue_lock); - list_del_init(page_deferred_list(page)); - from->deferred_split_queue.split_queue_len--; - spin_unlock(&from->deferred_split_queue.split_queue_lock); - } -#endif /* * It is safe to change page->mem_cgroup here because the page * is referenced, charged, and isolated - we can't race with @@ -5357,16 +5380,6 @@ static int mem_cgroup_move_account(struct page *page, /* caller should have done css_get */ page->mem_cgroup = to; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (compound && list_empty(page_deferred_list(page))) { - spin_lock(&to->deferred_split_queue.split_queue_lock); - list_add_tail(page_deferred_list(page), - &to->deferred_split_queue.split_queue); - to->deferred_split_queue.split_queue_len++; - spin_unlock(&to->deferred_split_queue.split_queue_lock); - } -#endif - spin_unlock_irqrestore(&from->move_lock, flags); ret = 0; @@ -6699,19 +6712,9 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) return; - } rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/mm/memory.c b/mm/memory.c index 45442d9a4f52..0eba7af05777 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2221,7 +2221,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, bool ret; void *kaddr; void __user *uaddr; - bool force_mkyoung; + bool locked = false; struct vm_area_struct *vma = vmf->vma; struct mm_struct *mm = vma->vm_mm; unsigned long addr = vmf->address; @@ -2246,11 +2246,11 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); - if (force_mkyoung) { + if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { /* * Other thread has already handled the fault @@ -2274,18 +2274,37 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * zeroes. */ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + if (locked) + goto warn; + + /* Re-validate under PTL if the page is still mapped */ + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + locked = true; + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* The PTE changed under us. Retry page fault. */ + ret = false; + goto pte_unlock; + } + /* - * Give a warn in case there can be some obscure - * use-case + * The same page can be mapped back since last copy attampt. + * Try to copy again under PTL. */ - WARN_ON_ONCE(1); - clear_page(kaddr); + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { + /* + * Give a warn in case there can be some obscure + * use-case + */ +warn: + WARN_ON_ONCE(1); + clear_page(kaddr); + } } ret = true; pte_unlock: - if (force_mkyoung) + if (locked) pte_unmap_unlock(vmf->pte, vmf->ptl); kunmap_atomic(kaddr); flush_dcache_page(dst); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a91a072f2b2c..673900faea76 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -599,7 +599,13 @@ EXPORT_SYMBOL_GPL(restore_online_page_callback); void generic_online_page(struct page *page, unsigned int order) { - kernel_map_pages(page, 1 << order, 1); + /* + * Freeing the page with debug_pagealloc enabled will try to unmap it, + * so we should map it first. This is better than introducing a special + * case in page freeing fast path. + */ + if (debug_pagealloc_enabled_static()) + kernel_map_pages(page, 1 << order, 1); __free_pages_core(page, order); totalram_pages_add(1UL << order); #ifdef CONFIG_HIGHMEM @@ -1764,8 +1770,6 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) BUG_ON(check_hotplug_memory_range(start, size)); - mem_hotplug_begin(); - /* * All memory blocks must be offlined before removing memory. Check * whether all memory blocks in question are offline and return error @@ -1778,9 +1782,14 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); - /* remove memory block devices before removing memory */ + /* + * Memory block device removal under the device_hotplug_lock is + * a barrier against racing online attempts. + */ remove_memory_block_devices(start, size); + mem_hotplug_begin(); + arch_remove_memory(nid, start, size, NULL); memblock_free(start, size); memblock_remove(start, size); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b2920ae87a61..977c641f78cf 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2821,6 +2821,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) char *flags = strchr(str, '='); int err = 1, mode; + if (flags) + *flags++ = '\0'; /* terminate mode string */ + if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; @@ -2831,9 +2834,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) } else nodes_clear(nodes); - if (flags) - *flags++ = '\0'; /* terminate mode string */ - mode = match_string(policy_modes, MPOL_MAX, str); if (mode < 0) goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 86873b6f38a7..b3b5d3bf0aab 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1627,8 +1627,19 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, start = i; } else if (node != current_node) { err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) + if (err) { + /* + * Positive err means the number of failed + * pages to migrate. Since we are going to + * abort and return the number of non-migrated + * pages, so need to incude the rest of the + * nr_pages that have not been attempted as + * well. + */ + if (err > 0) + err += nr_pages - i - 1; goto out; + } err = store_status(status, start, current_node, i - start); if (err) goto out; @@ -1659,8 +1670,11 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, goto out_flush; err = do_move_pages_to_node(mm, &pagelist, current_node); - if (err) + if (err) { + if (err > 0) + err += nr_pages - i - 1; goto out; + } if (i > start) { err = store_status(status, start, current_node, i - start); if (err) @@ -1674,9 +1688,16 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, /* Make sure we do not overwrite the existing error */ err1 = do_move_pages_to_node(mm, &pagelist, current_node); + /* + * Don't have to report non-attempted pages here since: + * - If the above loop is done gracefully all pages have been + * attempted. + * - If the above loop is aborted it means a fatal error + * happened, should return ret. + */ if (!err1) err1 = store_status(status, start, current_node, i - start); - if (!err) + if (err >= 0) err = err1; out: return err; diff --git a/mm/mmap.c b/mm/mmap.c index 71e4ffc83bcd..cb2c79a3e914 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -195,8 +195,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool downgraded = false; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1561,8 +1559,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 7d70e5c78f97..7c1b8f67af7b 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -102,14 +102,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ */ static inline void tlb_table_invalidate(struct mmu_gather *tlb) { -#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif + if (tlb_needs_table_invalidate()) { + /* + * Invalidate page-table caches used by hardware walkers. Then + * we still need to RCU-sched wait while freeing the pages + * because software walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); + } } static void tlb_remove_table_smp_sync(void *arg) diff --git a/mm/mprotect.c b/mm/mprotect.c index 7a8e84f86831..311c0dadf71c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -161,6 +161,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -178,8 +203,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ diff --git a/mm/mremap.c b/mm/mremap.c index 122938dcec15..af363063ea23 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -607,7 +607,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; diff --git a/mm/nommu.c b/mm/nommu.c index bd2b4e5ef144..318df4e236c9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -370,10 +370,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d047bf7d8fd4..627f1eba6df7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6921,7 +6921,8 @@ static u64 zero_pfn_range(unsigned long spfn, unsigned long epfn) * This function also addresses a similar issue where struct pages are left * uninitialized because the physical address range is not covered by * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=. + * layout is manually configured via memmap=, or when the highest physical + * address (max_pfn) does not end on a section boundary. */ void __init zero_resv_unavail(void) { @@ -6939,7 +6940,16 @@ void __init zero_resv_unavail(void) pgcnt += zero_pfn_range(PFN_DOWN(next), PFN_UP(start)); next = end; } - pgcnt += zero_pfn_range(PFN_DOWN(next), max_pfn); + + /* + * Early sections always have a fully populated memmap for the whole + * section - see pfn_valid(). If the last section has holes at the + * end and that section is marked "online", the memmap will be + * considered initialized. Make sure that memmap has a well defined + * state. + */ + pgcnt += zero_pfn_range(PFN_DOWN(next), + round_up(max_pfn, PAGES_PER_SECTION)); /* * Struct pages that do not have backing memory. This could be because diff --git a/mm/slub.c b/mm/slub.c index 8eafccf75940..95534ccefdc2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1954,8 +1954,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - else if (!node_present_pages(node)) - searchnode = node_to_mem_node(node); object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -2544,17 +2542,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, struct page *page; page = c->page; - if (!page) + if (!page) { + /* + * if the node is not online or has no normal memory, just + * ignore the node constraint + */ + if (unlikely(node != NUMA_NO_NODE && + !node_state(node, N_NORMAL_MEMORY))) + node = NUMA_NO_NODE; goto new_slab; + } redo: if (unlikely(!node_match(page, node))) { - int searchnode = node; - - if (node != NUMA_NO_NODE && !node_present_pages(node)) - searchnode = node_to_mem_node(node); - - if (unlikely(!node_match(page, searchnode))) { + /* + * same as above but node_match() being false already + * implies node != NUMA_NO_NODE + */ + if (!node_state(node, N_NORMAL_MEMORY)) { + node = NUMA_NO_NODE; + goto redo; + } else { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist, c); goto new_slab; @@ -2978,11 +2986,13 @@ static __always_inline void do_slab_free(struct kmem_cache *s, barrier(); if (likely(page == c->page)) { - set_freepointer(s, tail_obj, c->freelist); + void **freelist = READ_ONCE(c->freelist); + + set_freepointer(s, tail_obj, freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, - c->freelist, tid, + freelist, tid, head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); @@ -3155,6 +3165,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist diff --git a/mm/sparse.c b/mm/sparse.c index 3822ecbd8a1f..3c24708a4dc6 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -744,6 +744,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, struct mem_section *ms = __pfn_to_section(pfn); bool section_is_early = early_section(ms); struct page *memmap = NULL; + bool empty; unsigned long *subsection_map = ms->usage ? &ms->usage->subsection_map[0] : NULL; @@ -774,7 +775,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified */ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { + empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION); + if (empty) { unsigned long section_nr = pfn_to_section_nr(pfn); /* @@ -789,13 +791,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); } if (section_is_early && memmap) free_map_bootmem(memmap); else depopulate_section_memmap(pfn, nr_pages, altmap); + + if (empty) + ms->section_mem_map = (unsigned long)NULL; } static struct page * __meminit section_activate(int nid, unsigned long pfn, @@ -886,7 +890,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + page_init_poison(memmap, sizeof(struct page) * nr_pages); ms = __nr_to_section(section_nr); set_section_nid(section_nr, nid); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index b29ad17edcf5..7b52fff1c43a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1287,7 +1287,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * TODO: to calculate a flush range without looping. @@ -3120,16 +3120,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, unsigned long addr, void *data) { diff --git a/mm/vmscan.c b/mm/vmscan.c index 572fb17c6273..af4b2b3d4e0d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2429,10 +2429,13 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 5b0b20e6da95..d88a4de02237 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -789,6 +789,10 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) + return; + /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in * hardif_activate_interface() where the originator mac is set and diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 5d0ed28c0d3a..c86598ff4283 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -831,6 +831,8 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; + lock_sock(sk); + switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -878,6 +880,7 @@ static int hci_sock_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + release_sock(sk); sock_put(sk); return 0; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 458be6b3eda9..3ab23f698221 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -643,9 +643,10 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); + nbuckets = roundup_pow_of_two(num_possible_cpus()); /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ - smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus()))); - nbuckets = 1U << smap->bucket_log; + nbuckets = max_t(u32, 2, nbuckets); + smap->bucket_log = ilog2(nbuckets); cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); ret = bpf_map_charge_init(&smap->map.memory, cost); diff --git a/net/core/dev.c b/net/core/dev.c index 81befd0c2510..c3da35f3c7e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -146,7 +146,6 @@ #include "net-sysfs.h" #define MAX_GRO_SKBS 8 -#define MAX_NEST_DEV 8 /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) @@ -331,6 +330,12 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name) name_node = netdev_name_node_lookup(net, name); if (!name_node) return -ENOENT; + /* lookup might have found our primary name or a name belonging + * to another device. + */ + if (name_node == dev->name_node || name_node->dev != dev) + return -EINVAL; + __netdev_name_node_alt_destroy(name_node); return 0; @@ -3607,26 +3612,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) && - qdisc_run_begin(q)) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, - &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - goto end_run; - } - qdisc_bstats_cpu_update(q, skb); - - rc = NET_XMIT_SUCCESS; - if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) - __qdisc_run(q); - -end_run: - qdisc_run_end(q); - } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; - qdisc_run(q); - } + rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + qdisc_run(q); if (unlikely(to_free)) kfree_skb_list(to_free); @@ -4477,14 +4464,14 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. */ - if (skb_cloned(skb) || skb_is_tc_redirected(skb)) + if (skb_is_tc_redirected(skb)) return XDP_PASS; /* XDP packets must be linear and must have sufficient headroom * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also * native XDP provides, thus we need to do it here as well. */ - if (skb_is_nonlinear(skb) || + if (skb_cloned(skb) || skb_is_nonlinear(skb) || skb_headroom(skb) < XDP_PACKET_HEADROOM) { int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); int troom = skb->tail + skb->data_len - skb->end; @@ -7153,8 +7140,8 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, return 0; } -static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, - struct list_head **iter) +struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; @@ -7166,6 +7153,7 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, return lower->dev; } +EXPORT_SYMBOL(netdev_next_lower_dev_rcu); static u8 __netdev_upper_depth(struct net_device *dev) { diff --git a/net/core/devlink.c b/net/core/devlink.c index f76219bf0c21..b10c7a83badb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3352,34 +3352,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { + struct nlattr *param_data; int len; - if (param->type != DEVLINK_PARAM_TYPE_BOOL && - !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) + param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; + + if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: - value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u8)) + return -EINVAL; + value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: - value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u16)) + return -EINVAL; + value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: - value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]); + if (nla_len(param_data) != sizeof(u32)) + return -EINVAL; + value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: - len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]), - nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); - if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) || + len = strnlen(nla_data(param_data), nla_len(param_data)); + if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; - strcpy(value->vstr, - nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])); + strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: - value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ? - true : false; + if (param_data && nla_len(param_data)) + return -EINVAL; + value->vbool = nla_get_flag(param_data); break; } return 0; @@ -3986,6 +3993,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto out_unlock; } + /* return 0 if there is no further data to read */ + if (start_offset >= region->size) { + err = 0; + goto out_unlock; + } + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, DEVLINK_CMD_REGION_READ); @@ -5911,6 +5924,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 536e032d95c8..246a258b1fac 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1004,8 +1004,10 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) { int cpu; - if (!monitor_hw) + if (!monitor_hw) { NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); + return; + } monitor_hw = false; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3e7e15278c46..bd7eba9066f8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -974,7 +974,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/filter.c b/net/core/filter.c index 538f6a735a19..f797b1599c92 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3543,7 +3543,7 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, return err; } default: - break; + return -EBADRQC; } return 0; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2dbbb030fbed..cc32d1dd78c3 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, @@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 0642f91c4038..b4c87fe31be2 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d9001b5c48eb..1737bac74c45 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1241,6 +1241,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = @@ -1289,8 +1291,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - memset(&node_guid, 0, sizeof(node_guid)); - memset(&port_guid, 0, sizeof(port_guid)); if (dev->netdev_ops->ndo_get_vf_guid && !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, &port_guid)) { @@ -3499,27 +3499,25 @@ static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, if (err) return err; - alt_ifname = nla_data(attr); + alt_ifname = nla_strdup(attr, GFP_KERNEL); + if (!alt_ifname) + return -ENOMEM; + if (cmd == RTM_NEWLINKPROP) { - alt_ifname = kstrdup(alt_ifname, GFP_KERNEL); - if (!alt_ifname) - return -ENOMEM; err = netdev_name_node_alt_create(dev, alt_ifname); - if (err) { - kfree(alt_ifname); - return err; - } + if (!err) + alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); - if (err) - return err; } else { - WARN_ON(1); - return 0; + WARN_ON_ONCE(1); + err = -EINVAL; } - *changed = true; - return 0; + kfree(alt_ifname); + if (!err) + *changed = true; + return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/sock.c b/net/core/sock.c index 8459ad579f73..a2f8a34c1e69 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1830,7 +1830,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8998e356f423..405397801bb0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -234,7 +234,6 @@ static void sock_map_free(struct bpf_map *map) int i; synchronize_rcu(); - rcu_read_lock(); raw_spin_lock_bh(&stab->lock); for (i = 0; i < stab->map.max_entries; i++) { struct sock **psk = &stab->sks[i]; @@ -243,13 +242,15 @@ static void sock_map_free(struct bpf_map *map) sk = xchg(psk, NULL); if (sk) { lock_sock(sk); + rcu_read_lock(); sock_map_unref(sk, psk); + rcu_read_unlock(); release_sock(sk); } } raw_spin_unlock_bh(&stab->lock); - rcu_read_unlock(); + /* wait for psock readers accessing its map link */ synchronize_rcu(); bpf_map_area_free(stab->sks); @@ -416,14 +417,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_map_update_common(map, idx, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_map_update_common(map, idx, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -739,14 +742,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_hash_update_common(map, key, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_hash_update_common(map, key, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -859,19 +864,25 @@ static void sock_hash_free(struct bpf_map *map) int i; synchronize_rcu(); - rcu_read_lock(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); raw_spin_lock_bh(&bucket->lock); hlist_for_each_entry_safe(elem, node, &bucket->head, node) { hlist_del_rcu(&elem->node); lock_sock(elem->sk); + rcu_read_lock(); sock_map_unref(elem->sk, elem); + rcu_read_unlock(); release_sock(elem->sk); } raw_spin_unlock_bh(&bucket->lock); } - rcu_read_unlock(); + + /* wait for psock readers accessing its map link */ + synchronize_rcu(); + + /* wait for psock readers accessing its map link */ + synchronize_rcu(); bpf_map_area_free(htab->buckets); kfree(htab); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2dd86d9bcda9..a0bdd6011c9f 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -117,7 +117,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); +int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable_rt(struct dsa_port *dp); void dsa_port_disable(struct dsa_port *dp); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); diff --git a/net/dsa/port.c b/net/dsa/port.c index 46ac9ba21987..f190b129260e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -63,7 +63,7 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } -int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -78,14 +78,31 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + if (dp->pl) + phylink_start(dp->pl); + return 0; } -void dsa_port_disable(struct dsa_port *dp) +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +{ + int err; + + rtnl_lock(); + err = dsa_port_enable_rt(dp, phy); + rtnl_unlock(); + + return err; +} + +void dsa_port_disable_rt(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; int port = dp->index; + if (dp->pl) + phylink_stop(dp->pl); + if (!dp->bridge_dev) dsa_port_set_state_now(dp, BR_STATE_DISABLED); @@ -93,6 +110,13 @@ void dsa_port_disable(struct dsa_port *dp) ds->ops->port_disable(ds, port); } +void dsa_port_disable(struct dsa_port *dp) +{ + rtnl_lock(); + dsa_port_disable_rt(dp); + rtnl_unlock(); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { @@ -619,10 +643,6 @@ static int dsa_port_phylink_register(struct dsa_port *dp) goto err_phy_connect; } - rtnl_lock(); - phylink_start(dp->pl); - rtnl_unlock(); - return 0; err_phy_connect: @@ -633,9 +653,14 @@ static int dsa_port_phylink_register(struct dsa_port *dp) int dsa_port_link_register_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; + struct device_node *phy_np; - if (!ds->ops->adjust_link) - return dsa_port_phylink_register(dp); + if (!ds->ops->adjust_link) { + phy_np = of_parse_phandle(dp->dn, "phy-handle", 0); + if (of_phy_is_fixed_link(dp->dn) || phy_np) + return dsa_port_phylink_register(dp); + return 0; + } dev_warn(ds->dev, "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n"); @@ -650,11 +675,12 @@ void dsa_port_link_unregister_of(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; - if (!ds->ops->adjust_link) { + if (!ds->ops->adjust_link && dp->pl) { rtnl_lock(); phylink_disconnect_phy(dp->pl); rtnl_unlock(); phylink_destroy(dp->pl); + dp->pl = NULL; return; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 78ffc87dc25e..0b882de5483c 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -90,12 +90,10 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - err = dsa_port_enable(dp, dev->phydev); + err = dsa_port_enable_rt(dp, dev->phydev); if (err) goto clear_promisc; - phylink_start(dp->pl); - return 0; clear_promisc: @@ -119,9 +117,7 @@ static int dsa_slave_close(struct net_device *dev) cancel_work_sync(&dp->xmit_work); skb_queue_purge(&dp->xmit_queue); - phylink_stop(dp->pl); - - dsa_port_disable(dp); + dsa_port_disable_rt(dp); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index c8a128c9e5e0..70db7c909f74 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -33,7 +33,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_port *dp = dsa_slave_to_port(dev); u16 *phdr, hdr; - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index ee561297d8a7..fbfd0db182b7 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -27,6 +27,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 2c7a38d76a3a..0672b2f01586 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -21,7 +21,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 376882215919..0bd10a1f477f 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1724,6 +1724,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1735,7 +1736,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 5fd6e8ed02b5..66fdbfe5447c 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -56,7 +56,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -110,8 +112,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 18068ed42f25..f369e7ce685b 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -748,6 +748,39 @@ out:; } EXPORT_SYMBOL(__icmp_send); +#if IS_ENABLED(CONFIG_NF_NAT) +#include +void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + __be32 orig_ip; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmp_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct iphdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct iphdr)))) + goto out; + + orig_ip = ip_hdr(skb_in)->saddr; + ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + icmp_send(skb_in, type, code, info); + ip_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +EXPORT_SYMBOL(icmp_ndo_send); +#endif static void icmp_socket_deliver(struct sk_buff *skb, u32 info) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 18c0d5bffe12..159513ce8511 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -482,8 +482,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + out: release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * newsk->sk_memcg. + */ + lock_sock(newsk); + + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } if (req) reqsk_put(req); return newsk; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index f11e997e517b..8c8377568a78 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -100,13 +100,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -147,6 +143,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -284,24 +298,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - out: nlmsg_end(skb, nlh); return 0; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 511eaa94e2d1..d072c326dd64 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -321,7 +321,9 @@ static size_t nh_nlmsg_size_single(struct nexthop *nh) static size_t nh_nlmsg_size(struct nexthop *nh) { - size_t sz = nla_total_size(4); /* NHA_ID */ + size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg)); + + sz += nla_total_size(4); /* NHA_ID */ if (nh->is_group) sz += nh_nlmsg_size_grp(nh); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index e35736b99300..a93e7d1e1251 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -100,8 +100,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7d766e6390e..94e137f7aa9f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2621,10 +2621,12 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() @@ -2636,10 +2638,14 @@ int tcp_disconnect(struct sock *sk, int flags) sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); tp->compressed_ack = 0; + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_sent = 0; tp->bytes_acked = 0; tp->bytes_received = 0; tp->bytes_retrans = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; tp->duplicate_sack[0].start_seq = 0; tp->duplicate_sack[0].end_seq = 0; tp->dsack_dups = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 030d43c7c957..be5c5903cfe1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1856,8 +1856,12 @@ int __udp_disconnect(struct sock *sk, int flags) inet->inet_dport = 0; sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; - if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) { inet_reset_saddr(sk); + if (sk->sk_prot->rehash && + (sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + sk->sk_prot->rehash(sk); + } if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { sk->sk_prot->unhash(sk); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 910555a4d9fe..dccd2286bc28 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -64,8 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 39d861d00377..46d614b611db 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1226,11 +1226,13 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) } static void -cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) +cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, + bool del_rt, bool del_peer) { struct fib6_info *f6i; - f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + f6i = addrconf_get_prefix_route(del_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) @@ -1293,7 +1295,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (action != CLEANUP_PREFIX_RT_NOP) { cleanup_prefix_route(ifp, expires, - action == CLEANUP_PREFIX_RT_DEL); + action == CLEANUP_PREFIX_RT_DEL, false); } /* clean up prefsrc entries */ @@ -3345,6 +3347,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } @@ -4586,12 +4592,14 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, } static int modify_prefix_route(struct inet6_ifaddr *ifp, - unsigned long expires, u32 flags) + unsigned long expires, u32 flags, + bool modify_peer) { struct fib6_info *f6i; u32 prio; - f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + f6i = addrconf_get_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->idev->dev, 0, RTF_DEFAULT, true); if (!f6i) return -ENOENT; @@ -4602,7 +4610,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, ip6_del_rt(dev_net(ifp->idev->dev), f6i); /* add new one */ - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr, + ifp->prefix_len, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } else { @@ -4624,6 +4633,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) unsigned long timeout; bool was_managetempaddr; bool had_prefixroute; + bool new_peer = false; ASSERT_RTNL(); @@ -4655,6 +4665,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) cfg->preferred_lft = timeout; } + if (cfg->peer_pfx && + memcmp(&ifp->peer_addr, cfg->peer_pfx, sizeof(struct in6_addr))) { + if (!ipv6_addr_any(&ifp->peer_addr)) + cleanup_prefix_route(ifp, expires, true, true); + new_peer = true; + } + spin_lock_bh(&ifp->lock); was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; had_prefixroute = ifp->flags & IFA_F_PERMANENT && @@ -4670,6 +4687,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority) ifp->rt_priority = cfg->rt_priority; + if (new_peer) + ifp->peer_addr = *cfg->peer_pfx; + spin_unlock_bh(&ifp->lock); if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); @@ -4678,7 +4698,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) int rc = -ENOENT; if (had_prefixroute) - rc = modify_prefix_route(ifp, expires, flags); + rc = modify_prefix_route(ifp, expires, flags, false); /* prefix route could have been deleted; if so restore it */ if (rc == -ENOENT) { @@ -4686,6 +4706,15 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); } + + if (had_prefixroute && !ipv6_addr_any(&ifp->peer_addr)) + rc = modify_prefix_route(ifp, expires, flags, true); + + if (rc == -ENOENT && !ipv6_addr_any(&ifp->peer_addr)) { + addrconf_prefix_route(&ifp->peer_addr, ifp->prefix_len, + ifp->rt_priority, ifp->idev->dev, + expires, flags, GFP_KERNEL); + } } else if (had_prefixroute) { enum cleanup_prefix_rt_t action; unsigned long rt_expires; @@ -4696,7 +4725,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg) if (action != CLEANUP_PREFIX_RT_NOP) { cleanup_prefix_route(ifp, rt_expires, - action == CLEANUP_PREFIX_RT_DEL); + action == CLEANUP_PREFIX_RT_DEL, false); } } @@ -5718,6 +5747,9 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) struct nlattr *tb[IFLA_INET6_MAX + 1]; int err; + if (!idev) + return -EAFNOSUPPORT; + if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) BUG(); @@ -5980,9 +6012,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) - addrconf_prefix_route(&ifp->peer_addr, 128, 0, - ifp->idev->dev, 0, 0, - GFP_ATOMIC); + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->rt_priority, ifp->idev->dev, + 0, 0, GFP_ATOMIC); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cfae0a1529a1..bde3bf180871 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1068,8 +1068,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -1112,7 +1111,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->fib6_table->tb6_lock)); diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 02045494c24c..e0086758b6ee 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -45,4 +45,38 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) rcu_read_unlock(); } EXPORT_SYMBOL(icmpv6_send); + +#if IS_ENABLED(CONFIG_NF_NAT) +#include +void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct sk_buff *cloned_skb = NULL; + enum ip_conntrack_info ctinfo; + struct in6_addr orig_ip; + struct nf_conn *ct; + + ct = nf_ct_get(skb_in, &ctinfo); + if (!ct || !(ct->status & IPS_SRC_NAT)) { + icmpv6_send(skb_in, type, code, info); + return; + } + + if (skb_shared(skb_in)) + skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC); + + if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head || + (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) > + skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in, + skb_network_offset(skb_in) + sizeof(struct ipv6hdr)))) + goto out; + + orig_ip = ipv6_hdr(skb_in)->saddr; + ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + icmpv6_send(skb_in, type, code, info); + ipv6_hdr(skb_in)->saddr = orig_ip; +out: + consume_skb(cloned_skb); +} +EXPORT_SYMBOL(icmpv6_ndo_send); +#endif #endif diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 79fc012dd2ca..debdaeba5d8c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -183,9 +183,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index affb51c11a25..119c7226c4be 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5152,6 +5152,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f82ea12bac37..425b95eb7e87 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -322,8 +322,13 @@ int l2tp_session_register(struct l2tp_session *session, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 000c742d0527..6aee699deb28 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3450,7 +3450,7 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, spin_lock_irqsave(&local->ack_status_lock, spin_flags); id = idr_alloc(&local->ack_status_frames, ack_skb, - 1, 0x40, GFP_ATOMIC); + 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, spin_flags); if (id < 0) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5fa13176036f..88d7a692a965 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2019 Intel Corporation + * Copyright (C) 2018 - 2020 Intel Corporation */ #include @@ -1311,7 +1311,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (!res) { ch_switch.timestamp = timestamp; ch_switch.device_timestamp = device_timestamp; - ch_switch.block_tx = beacon ? csa_ie.mode : 0; + ch_switch.block_tx = csa_ie.mode; ch_switch.chandef = csa_ie.chandef; ch_switch.count = csa_ie.count; ch_switch.delay = csa_ie.max_switch_time; @@ -1404,7 +1404,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->vif.csa_active = true; sdata->csa_chandef = csa_ie.chandef; - sdata->csa_block_tx = ch_switch.block_tx; + sdata->csa_block_tx = csa_ie.mode; ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) @@ -1438,7 +1438,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, * reset when the disconnection worker runs. */ sdata->vif.csa_active = true; - sdata->csa_block_tx = ch_switch.block_tx; + sdata->csa_block_tx = csa_ie.mode; ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); mutex_unlock(&local->chanctx_mtx); @@ -2959,7 +2959,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (auth_transaction == 2 && ifmgd->auth_data->expected_transaction == 2)) { if (!ieee80211_mark_sta_auth(sdata, bssid)) - goto out_err; + return; /* ignore frame -- wait for timeout */ } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 2) { sdata_info(sdata, "SAE peer confirmed\n"); @@ -2967,10 +2967,6 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); - return; - out_err: - mutex_unlock(&sdata->local->sta_mtx); - /* ignore frame -- wait for timeout */ } #define case_WLAN(type) \ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0e05ff037672..0ba98ad9bc85 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4114,7 +4114,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a8a7306a1f56..b0444e4aba2a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2442,7 +2442,7 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local, spin_lock_irqsave(&local->ack_status_lock, flags); id = idr_alloc(&local->ack_status_frames, ack_skb, - 1, 0x40, GFP_ATOMIC); + 1, 0x2000, GFP_ATOMIC); spin_unlock_irqrestore(&local->ack_status_lock, flags); if (id >= 0) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 32a7a53833c0..decd46b38393 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1063,16 +1063,22 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; @@ -2987,10 +2993,22 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, int cf0, cf1; int ccfs0, ccfs1, ccfs2; int ccf0, ccf1; + u32 vht_cap; + bool support_80_80 = false; + bool support_160 = false; if (!oper || !htop) return false; + vht_cap = hw->wiphy->bands[chandef->chan->band]->vht_cap.cap; + support_160 = (vht_cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK | + IEEE80211_VHT_CAP_EXT_NSS_BW_MASK)); + support_80_80 = ((vht_cap & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) || + (vht_cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && + vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) || + ((vht_cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) >> + IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT > 1)); ccfs0 = oper->center_freq_seg0_idx; ccfs1 = oper->center_freq_seg1_idx; ccfs2 = (le16_to_cpu(htop->operation_mode) & @@ -3018,10 +3036,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, unsigned int diff; diff = abs(ccf1 - ccf0); - if (diff == 8) { + if ((diff == 8) && support_160) { new.width = NL80211_CHAN_WIDTH_160; new.center_freq1 = cf1; - } else if (diff > 8) { + } else if ((diff > 8) && support_80_80) { new.width = NL80211_CHAN_WIDTH_80P80; new.center_freq2 = cf1; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index cf895bc80871..8dd17589217d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -723,6 +723,20 @@ ip_set_rcu_get(struct net *net, ip_set_id_t index) return set; } +static inline void +ip_set_lock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_lock_bh(&set->lock); +} + +static inline void +ip_set_unlock(struct ip_set *set) +{ + if (!set->variant->region_lock) + spin_unlock_bh(&set->lock); +} + int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) @@ -744,9 +758,9 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb, if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); ret = 1; } else { /* --return-nomatch: invert matched element */ @@ -775,9 +789,9 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -797,9 +811,9 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb, !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); return ret; } @@ -1264,9 +1278,9 @@ ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); - spin_lock_bh(&set->lock); + ip_set_lock(set); set->variant->flush(set); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); } static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb, @@ -1483,31 +1497,34 @@ ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { }; static int -dump_init(struct netlink_callback *cb, struct ip_set_net *inst) +ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; + struct sk_buff *skb = cb->skb; + struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; - ip_set_id_t index; int ret; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) - return ret; + goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { + ip_set_id_t index; struct ip_set *set; set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); - if (!set) - return -ENOENT; - + if (!set) { + ret = -ENOENT; + goto error; + } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { @@ -1523,10 +1540,17 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst) cb->args[IPSET_CB_DUMP] = dump_type; return 0; + +error: + /* We have to create and send the error message manually :-( */ + if (nlh->nlmsg_flags & NLM_F_ACK) { + netlink_ack(cb->skb, nlh, ret, NULL); + } + return ret; } static int -ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) +ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; @@ -1537,18 +1561,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) bool is_destroyed; int ret = 0; - if (!cb->args[IPSET_CB_DUMP]) { - ret = dump_init(cb, inst); - if (ret < 0) { - nlh = nlmsg_hdr(cb->skb); - /* We have to create and send the error message - * manually :-( - */ - if (nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(cb->skb, nlh, ret, NULL); - return ret; - } - } + if (!cb->args[IPSET_CB_DUMP]) + return -EINVAL; if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; @@ -1684,7 +1698,8 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb, { struct netlink_dump_control c = { - .dump = ip_set_dump_start, + .start = ip_set_dump_start, + .dump = ip_set_dump_do, .done = ip_set_dump_done, }; return netlink_dump_start(ctnl, skb, nlh, &c); @@ -1712,9 +1727,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { - spin_lock_bh(&set->lock); + ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); - spin_unlock_bh(&set->lock); + ip_set_unlock(set); retried = true; } while (ret == -EAGAIN && set->variant->resize && diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7480ce55b5c8..e52d7b7597a0 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -7,13 +7,21 @@ #include #include #include +#include #include -#define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) -#define ipset_dereference_protected(p, set) \ - __ipset_dereference_protected(p, lockdep_is_held(&(set)->lock)) - -#define rcu_dereference_bh_nfnl(p) rcu_dereference_bh_check(p, 1) +#define __ipset_dereference(p) \ + rcu_dereference_protected(p, 1) +#define ipset_dereference_nfnl(p) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) +#define ipset_dereference_set(p, set) \ + rcu_dereference_protected(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ + lockdep_is_held(&(set)->lock)) +#define ipset_dereference_bh_nfnl(p) \ + rcu_dereference_bh_check(p, \ + lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. @@ -72,11 +80,35 @@ struct hbucket { __aligned(__alignof__(u64)); }; +/* Region size for locking == 2^HTABLE_REGION_BITS */ +#define HTABLE_REGION_BITS 10 +#define ahash_numof_locks(htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 1 \ + : jhash_size((htable_bits) - HTABLE_REGION_BITS)) +#define ahash_sizeof_regions(htable_bits) \ + (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) +#define ahash_region(n, htable_bits) \ + ((n) % ahash_numof_locks(htable_bits)) +#define ahash_bucket_start(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? 0 \ + : (h) * jhash_size(HTABLE_REGION_BITS)) +#define ahash_bucket_end(h, htable_bits) \ + ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ + : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) + +struct htable_gc { + struct delayed_work dwork; + struct ip_set *set; /* Set the gc belongs to */ + u32 region; /* Last gc run position */ +}; + /* The hash table: the table size stored here in order to make resizing easy */ struct htable { atomic_t ref; /* References for resizing */ - atomic_t uref; /* References for dumping */ + atomic_t uref; /* References for dumping and gc */ u8 htable_bits; /* size of hash table == 2^htable_bits */ + u32 maxelem; /* Maxelem per region */ + struct ip_set_region *hregion; /* Region locks and ext sizes */ struct hbucket __rcu *bucket[0]; /* hashtable buckets */ }; @@ -162,6 +194,10 @@ htable_bits(u32 hashsize) #define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ +#define SET_ELEM_EXPIRED(set, d) \ + (SET_WITH_TIMEOUT(set) && \ + ip_set_timeout_expired(ext_timeout(d, set))) + #endif /* _IP_SET_HASH_GEN_H */ #ifndef MTYPE @@ -205,10 +241,12 @@ htable_bits(u32 hashsize) #undef mtype_test_cidrs #undef mtype_test #undef mtype_uref -#undef mtype_expire #undef mtype_resize +#undef mtype_ext_size +#undef mtype_resize_ad #undef mtype_head #undef mtype_list +#undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init #undef mtype_variant @@ -247,10 +285,12 @@ htable_bits(u32 hashsize) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) #define mtype_uref IPSET_TOKEN(MTYPE, _uref) -#define mtype_expire IPSET_TOKEN(MTYPE, _expire) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) +#define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) +#define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_list IPSET_TOKEN(MTYPE, _list) +#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) @@ -275,8 +315,7 @@ htable_bits(u32 hashsize) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ - struct timer_list gc; /* garbage collection when timeout enabled */ - struct ip_set *set; /* attached to this ip_set */ + struct htable_gc gc; /* gc workqueue */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK @@ -288,21 +327,33 @@ struct htype { #ifdef IP_SET_HASH_WITH_NETMASK u8 netmask; /* netmask value for subnets to store */ #endif + struct list_head ad; /* Resize add|del backlist */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; +/* ADD|DEL entries saved during resize */ +struct mtype_resize_ad { + struct list_head list; + enum ipset_adt ad; /* ADD|DEL element */ + struct mtype_elem d; /* Element value */ + struct ip_set_ext ext; /* Extensions for ADD */ + struct ip_set_ext mext; /* Target extensions for ADD */ + u32 flags; /* Flags for ADD */ +}; + #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void -mtype_add_cidr(struct htype *h, u8 cidr, u8 n) +mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { int i, j; + spin_lock_bh(&set->lock); /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { @@ -311,7 +362,7 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) j = i; } else if (h->nets[i].cidr[n] == cidr) { h->nets[CIDR_POS(cidr)].nets[n]++; - return; + goto unlock; } } if (j != -1) { @@ -320,24 +371,29 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 n) } h->nets[i].cidr[n] = cidr; h->nets[CIDR_POS(cidr)].nets[n] = 1; +unlock: + spin_unlock_bh(&set->lock); } static void -mtype_del_cidr(struct htype *h, u8 cidr, u8 n) +mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { u8 i, j, net_end = NLEN - 1; + spin_lock_bh(&set->lock); for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; if (h->nets[CIDR_POS(cidr)].nets[n] > 0) - return; + goto unlock; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; - return; + goto unlock; } +unlock: + spin_unlock_bh(&set->lock); } #endif @@ -345,7 +401,7 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 n) static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t) { - return sizeof(*h) + sizeof(*t); + return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); } /* Get the ith element from the array block n */ @@ -369,24 +425,29 @@ mtype_flush(struct ip_set *set) struct htype *h = set->data; struct htable *t; struct hbucket *n; - u32 i; - - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); - if (!n) - continue; - if (set->extensions & IPSET_EXT_DESTROY) - mtype_ext_cleanup(set, n); - /* FIXME: use slab cache */ - rcu_assign_pointer(hbucket(t, i), NULL); - kfree_rcu(n, rcu); + u32 r, i; + + t = ipset_dereference_nfnl(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); + if (!n) + continue; + if (set->extensions & IPSET_EXT_DESTROY) + mtype_ext_cleanup(set, n); + /* FIXME: use slab cache */ + rcu_assign_pointer(hbucket(t, i), NULL); + kfree_rcu(n, rcu); + } + t->hregion[r].ext_size = 0; + t->hregion[r].elements = 0; + spin_unlock_bh(&t->hregion[r].lock); } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(h->nets)); #endif - set->elements = 0; - set->ext_size = 0; } /* Destroy the hashtable part of the set */ @@ -397,7 +458,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + n = __ipset_dereference(hbucket(t, i)); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -406,6 +467,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) kfree(n); } + ip_set_free(t->hregion); ip_set_free(t); } @@ -414,28 +476,21 @@ static void mtype_destroy(struct ip_set *set) { struct htype *h = set->data; + struct list_head *l, *lt; if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&h->gc); + cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, - __ipset_dereference_protected(h->table, 1), true); + mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + list_for_each_safe(l, lt, &h->ad) { + list_del(l); + kfree(l); + } kfree(h); set->data = NULL; } -static void -mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) -{ - struct htype *h = set->data; - - timer_setup(&h->gc, gc, 0); - mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); - pr_debug("gc initialized, run in every %u\n", - IPSET_GC_PERIOD(set->timeout)); -} - static bool mtype_same_set(const struct ip_set *a, const struct ip_set *b) { @@ -454,11 +509,9 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } -/* Delete expired elements from the hashtable */ static void -mtype_expire(struct ip_set *set, struct htype *h) +mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) { - struct htable *t; struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; @@ -466,10 +519,12 @@ mtype_expire(struct ip_set *set, struct htype *h) #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif + u8 htable_bits = t->htable_bits; - t = ipset_dereference_protected(h->table, set); - for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(t, i), 1); + spin_lock_bh(&t->hregion[r].lock); + for (i = ahash_bucket_start(r, htable_bits); + i < ahash_bucket_end(r, htable_bits); i++) { + n = __ipset_dereference(hbucket(t, i)); if (!n) continue; for (j = 0, d = 0; j < n->pos; j++) { @@ -485,58 +540,100 @@ mtype_expire(struct ip_set *set, struct htype *h) smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, k)), k); #endif + t->hregion[r].elements--; ip_set_ext_destroy(set, data); - set->elements--; d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { + t->hregion[r].ext_size -= + ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, i), NULL); kfree_rcu(n, rcu); continue; } tmp = kzalloc(sizeof(*tmp) + - (n->size - AHASH_INIT_SIZE) * dsize, - GFP_ATOMIC); + (n->size - AHASH_INIT_SIZE) * dsize, + GFP_ATOMIC); if (!tmp) - /* Still try to delete expired elements */ + /* Still try to delete expired elements. */ continue; tmp->size = n->size - AHASH_INIT_SIZE; for (j = 0, d = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); - memcpy(tmp->value + d * dsize, data, dsize); + memcpy(tmp->value + d * dsize, + data, dsize); set_bit(d, tmp->used); d++; } tmp->pos = d; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } } + spin_unlock_bh(&t->hregion[r].lock); } static void -mtype_gc(struct timer_list *t) +mtype_gc(struct work_struct *work) { - struct htype *h = from_timer(h, t, gc); - struct ip_set *set = h->set; + struct htable_gc *gc; + struct ip_set *set; + struct htype *h; + struct htable *t; + u32 r, numof_locks; + unsigned int next_run; + + gc = container_of(work, struct htable_gc, dwork.work); + set = gc->set; + h = set->data; - pr_debug("called\n"); spin_lock_bh(&set->lock); - mtype_expire(set, h); + t = ipset_dereference_set(h->table, set); + atomic_inc(&t->uref); + numof_locks = ahash_numof_locks(t->htable_bits); + r = gc->region++; + if (r >= numof_locks) { + r = gc->region = 0; + } + next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; + if (next_run < HZ/10) + next_run = HZ/10; spin_unlock_bh(&set->lock); - h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ; - add_timer(&h->gc); + mtype_gc_do(set, h, t, r); + + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by expire: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + + queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); + +} + +static void +mtype_gc_init(struct htable_gc *gc) +{ + INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); + queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, + struct ip_set_ext *mext, u32 flags); + /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or * fail due to memory pressures. @@ -547,7 +644,7 @@ mtype_resize(struct ip_set *set, bool retried) struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; - size_t extsize, dsize = set->dsize; + size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; @@ -555,7 +652,9 @@ mtype_resize(struct ip_set *set, bool retried) struct mtype_elem *data; struct mtype_elem *d; struct hbucket *n, *m; - u32 i, j, key; + struct list_head *l, *lt; + struct mtype_resize_ad *x; + u32 i, j, r, nr, key; int ret; #ifdef IP_SET_HASH_WITH_NETS @@ -563,10 +662,8 @@ mtype_resize(struct ip_set *set, bool retried) if (!tmp) return -ENOMEM; #endif - rcu_read_lock_bh(); - orig = rcu_dereference_bh_nfnl(h->table); + orig = ipset_dereference_bh_nfnl(h->table); htable_bits = orig->htable_bits; - rcu_read_unlock_bh(); retry: ret = 0; @@ -583,88 +680,124 @@ mtype_resize(struct ip_set *set, bool retried) ret = -ENOMEM; goto out; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); + if (!t->hregion) { + kfree(t); + ret = -ENOMEM; + goto out; + } t->htable_bits = htable_bits; + t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); + for (i = 0; i < ahash_numof_locks(htable_bits); i++) + spin_lock_init(&t->hregion[i].lock); - spin_lock_bh(&set->lock); - orig = __ipset_dereference_protected(h->table, 1); - /* There can't be another parallel resizing, but dumping is possible */ + /* There can't be another parallel resizing, + * but dumping, gc, kernel side add/del are possible + */ + orig = ipset_dereference_bh_nfnl(h->table); atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); - extsize = 0; pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); - for (i = 0; i < jhash_size(orig->htable_bits); i++) { - n = __ipset_dereference_protected(hbucket(orig, i), 1); - if (!n) - continue; - for (j = 0; j < n->pos; j++) { - if (!test_bit(j, n->used)) + for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { + /* Expire may replace a hbucket with another one */ + rcu_read_lock_bh(); + for (i = ahash_bucket_start(r, orig->htable_bits); + i < ahash_bucket_end(r, orig->htable_bits); i++) { + n = __ipset_dereference(hbucket(orig, i)); + if (!n) continue; - data = ahash_data(n, j, dsize); + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, dsize); + if (SET_ELEM_EXPIRED(set, data)) + continue; #ifdef IP_SET_HASH_WITH_NETS - /* We have readers running parallel with us, - * so the live data cannot be modified. - */ - flags = 0; - memcpy(tmp, data, dsize); - data = tmp; - mtype_data_reset_flags(data, &flags); + /* We have readers running parallel with us, + * so the live data cannot be modified. + */ + flags = 0; + memcpy(tmp, data, dsize); + data = tmp; + mtype_data_reset_flags(data, &flags); #endif - key = HKEY(data, h->initval, htable_bits); - m = __ipset_dereference_protected(hbucket(t, key), 1); - if (!m) { - m = kzalloc(sizeof(*m) + + key = HKEY(data, h->initval, htable_bits); + m = __ipset_dereference(hbucket(t, key)); + nr = ahash_region(key, htable_bits); + if (!m) { + m = kzalloc(sizeof(*m) + AHASH_INIT_SIZE * dsize, GFP_ATOMIC); - if (!m) { - ret = -ENOMEM; - goto cleanup; - } - m->size = AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - RCU_INIT_POINTER(hbucket(t, key), m); - } else if (m->pos >= m->size) { - struct hbucket *ht; - - if (m->size >= AHASH_MAX(h)) { - ret = -EAGAIN; - } else { - ht = kzalloc(sizeof(*ht) + + if (!m) { + ret = -ENOMEM; + goto cleanup; + } + m->size = AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + RCU_INIT_POINTER(hbucket(t, key), m); + } else if (m->pos >= m->size) { + struct hbucket *ht; + + if (m->size >= AHASH_MAX(h)) { + ret = -EAGAIN; + } else { + ht = kzalloc(sizeof(*ht) + (m->size + AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); - if (!ht) - ret = -ENOMEM; + if (!ht) + ret = -ENOMEM; + } + if (ret < 0) + goto cleanup; + memcpy(ht, m, sizeof(struct hbucket) + + m->size * dsize); + ht->size = m->size + AHASH_INIT_SIZE; + t->hregion[nr].ext_size += + ext_size(AHASH_INIT_SIZE, + dsize); + kfree(m); + m = ht; + RCU_INIT_POINTER(hbucket(t, key), ht); } - if (ret < 0) - goto cleanup; - memcpy(ht, m, sizeof(struct hbucket) + - m->size * dsize); - ht->size = m->size + AHASH_INIT_SIZE; - extsize += ext_size(AHASH_INIT_SIZE, dsize); - kfree(m); - m = ht; - RCU_INIT_POINTER(hbucket(t, key), ht); - } - d = ahash_data(m, m->pos, dsize); - memcpy(d, data, dsize); - set_bit(m->pos++, m->used); + d = ahash_data(m, m->pos, dsize); + memcpy(d, data, dsize); + set_bit(m->pos++, m->used); + t->hregion[nr].elements++; #ifdef IP_SET_HASH_WITH_NETS - mtype_data_reset_flags(d, &flags); + mtype_data_reset_flags(d, &flags); #endif + } } + rcu_read_unlock_bh(); } - rcu_assign_pointer(h->table, t); - set->ext_size = extsize; - spin_unlock_bh(&set->lock); + /* There can't be any other writer. */ + rcu_assign_pointer(h->table, t); /* Give time to other readers of the set */ synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); - /* If there's nobody else dumping the table, destroy it */ + /* Add/delete elements processed by the SET target during resize. + * Kernel-side add cannot trigger a resize and userspace actions + * are serialized by the mutex. + */ + list_for_each_safe(l, lt, &h->ad) { + x = list_entry(l, struct mtype_resize_ad, list); + if (x->ad == IPSET_ADD) { + mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); + } else { + mtype_del(set, &x->d, NULL, NULL, 0); + } + list_del(l); + kfree(l); + } + /* If there's nobody else using the table, destroy it */ if (atomic_dec_and_test(&orig->uref)) { pr_debug("Table destroy by resize %p\n", orig); mtype_ahash_destroy(set, orig, false); @@ -677,15 +810,44 @@ mtype_resize(struct ip_set *set, bool retried) return ret; cleanup: + rcu_read_unlock_bh(); atomic_set(&orig->ref, 0); atomic_dec(&orig->uref); - spin_unlock_bh(&set->lock); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; goto out; } +/* Get the current number of elements and ext_size in the set */ +static void +mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) +{ + struct htype *h = set->data; + const struct htable *t; + u32 i, j, r; + struct hbucket *n; + struct mtype_elem *data; + + t = rcu_dereference_bh(h->table); + for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { + for (i = ahash_bucket_start(r, t->htable_bits); + i < ahash_bucket_end(r, t->htable_bits); i++) { + n = rcu_dereference_bh(hbucket(t, i)); + if (!n) + continue; + for (j = 0; j < n->pos; j++) { + if (!test_bit(j, n->used)) + continue; + data = ahash_data(n, j, set->dsize); + if (!SET_ELEM_EXPIRED(set, data)) + (*elements)++; + } + } + *ext_size += t->hregion[r].ext_size; + } +} + /* Add an element to a hash and update the internal counters when succeeded, * otherwise report the proper error code. */ @@ -698,32 +860,49 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); - int i, j = -1; + int i, j = -1, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; - u32 key, multi = 0; + u32 r, key, multi = 0, elements, maxelem; - if (set->elements >= h->maxelem) { - if (SET_WITH_TIMEOUT(set)) - /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h); - if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set)) + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); + key = HKEY(value, h->initval, t->htable_bits); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + elements = t->hregion[r].elements; + maxelem = t->maxelem; + if (elements >= maxelem) { + u32 e; + if (SET_WITH_TIMEOUT(set)) { + rcu_read_unlock_bh(); + mtype_gc_do(set, h, t, r); + rcu_read_lock_bh(); + } + maxelem = h->maxelem; + elements = 0; + for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) + elements += t->hregion[e].elements; + if (elements >= maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } + rcu_read_unlock_bh(); - t = ipset_dereference_protected(h->table, set); - key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) { - if (forceadd || set->elements >= h->maxelem) + if (forceadd || elements >= maxelem) goto set_full; old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } n->size = AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { @@ -737,38 +916,37 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, } data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { - if (flag_exist || - (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)))) { + if (flag_exist || SET_ELEM_EXPIRED(set, data)) { /* Just the extensions could be overwritten */ j = i; goto overwrite_extensions; } - return -IPSET_ERR_EXIST; + ret = -IPSET_ERR_EXIST; + goto unlock; } /* Reuse first timed out entry */ - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set)) && - j == -1) { + if (SET_ELEM_EXPIRED(set, data) && j == -1) { j = i; reuse = true; } } if (reuse || forceadd) { + if (j == -1) + j = 0; data = ahash_data(n, j, set->dsize); if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_del_cidr(h, + mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), i); #endif ip_set_ext_destroy(set, data); - set->elements--; + t->hregion[r].elements--; } goto copy_data; } - if (set->elements >= h->maxelem) + if (elements >= maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { @@ -776,28 +954,32 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); - return -EAGAIN; + ret = -EAGAIN; + goto resize; } old = n; n = kzalloc(sizeof(*n) + (old->size + AHASH_INIT_SIZE) * set->dsize, GFP_ATOMIC); - if (!n) - return -ENOMEM; + if (!n) { + ret = -ENOMEM; + goto unlock; + } memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; - set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); + t->hregion[r].ext_size += + ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: - set->elements++; + t->hregion[r].elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); + mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: @@ -820,13 +1002,41 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (old) kfree_rcu(old, rcu); } + ret = 0; +resize: + spin_unlock_bh(&t->hregion[r].lock); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side add, save values */ + struct mtype_resize_ad *x; + + x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); + if (!x) + /* Don't bother */ + goto out; + x->ad = IPSET_ADD; + memcpy(&x->d, value, sizeof(struct mtype_elem)); + memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); + memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); + x->flags = flags; + spin_lock_bh(&set->lock); + list_add_tail(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + goto out; - return 0; set_full: if (net_ratelimit()) pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; + set->name, maxelem); + ret = -IPSET_ERR_HASH_FULL; +unlock: + spin_unlock_bh(&t->hregion[r].lock); +out: + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by add: %p\n", t); + mtype_ahash_destroy(set, t, false); + } + return ret; } /* Delete an element from the hash and free up space if possible. @@ -840,13 +1050,23 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; - int i, j, k, ret = -IPSET_ERR_EXIST; + struct mtype_resize_ad *x = NULL; + int i, j, k, r, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; size_t dsize = set->dsize; - t = ipset_dereference_protected(h->table, set); + /* Userspace add and resize is excluded by the mutex. + * Kernespace add does not trigger resize. + */ + rcu_read_lock_bh(); + t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); - n = __ipset_dereference_protected(hbucket(t, key), 1); + r = ahash_region(key, t->htable_bits); + atomic_inc(&t->uref); + rcu_read_unlock_bh(); + + spin_lock_bh(&t->hregion[r].lock); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) goto out; for (i = 0, k = 0; i < n->pos; i++) { @@ -857,8 +1077,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, data = ahash_data(n, i, dsize); if (!mtype_data_equal(data, d, &multi)) continue; - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(data, set))) + if (SET_ELEM_EXPIRED(set, data)) goto out; ret = 0; @@ -866,20 +1085,33 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; - set->elements--; + t->hregion[r].elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), - j); + mtype_del_cidr(set, h, + NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); #endif ip_set_ext_destroy(set, data); + if (atomic_read(&t->ref) && ext->target) { + /* Resize is in process and kernel side del, + * save values + */ + x = kzalloc(sizeof(struct mtype_resize_ad), + GFP_ATOMIC); + if (x) { + x->ad = IPSET_DEL; + memcpy(&x->d, value, + sizeof(struct mtype_elem)); + x->flags = flags; + } + } for (; i < n->pos; i++) { if (!test_bit(i, n->used)) k++; } if (n->pos == 0 && k == 0) { - set->ext_size -= ext_size(n->size, dsize); + t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { @@ -898,7 +1130,8 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, k++; } tmp->pos = k; - set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize); + t->hregion[r].ext_size -= + ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } @@ -906,6 +1139,16 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, } out: + spin_unlock_bh(&t->hregion[r].lock); + if (x) { + spin_lock_bh(&set->lock); + list_add(&x->list, &h->ad); + spin_unlock_bh(&set->lock); + } + if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { + pr_debug("Table destroy after resize by del: %p\n", t); + mtype_ahash_destroy(set, t, false); + } return ret; } @@ -991,6 +1234,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, int i, ret = 0; u32 key, multi = 0; + rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, @@ -1022,6 +1266,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, goto out; } out: + rcu_read_unlock_bh(); return ret; } @@ -1033,23 +1278,14 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) const struct htable *t; struct nlattr *nested; size_t memsize; + u32 elements = 0; + size_t ext_size = 0; u8 htable_bits; - /* If any members have expired, set->elements will be wrong - * mytype_expire function will update it with the right count. - * we do not hold set->lock here, so grab it first. - * set->elements can still be incorrect in the case of a huge set, - * because elements might time out during the listing. - */ - if (SET_WITH_TIMEOUT(set)) { - spin_lock_bh(&set->lock); - mtype_expire(set, h); - spin_unlock_bh(&set->lock); - } - rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); - memsize = mtype_ahash_memsize(h, t) + set->ext_size; + t = rcu_dereference_bh(h->table); + mtype_ext_size(set, &elements, &ext_size); + memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); @@ -1071,7 +1307,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) #endif if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || - nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements))) + nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; @@ -1091,15 +1327,15 @@ mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) if (start) { rcu_read_lock_bh(); - t = rcu_dereference_bh_nfnl(h->table); + t = ipset_dereference_bh_nfnl(h->table); atomic_inc(&t->uref); cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; rcu_read_unlock_bh(); } else if (cb->args[IPSET_CB_PRIVATE]) { t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { - /* Resizing didn't destroy the hash table */ - pr_debug("Table destroy by dump: %p\n", t); + pr_debug("Table destroy after resize " + " by dump: %p\n", t); mtype_ahash_destroy(set, t, false); } cb->args[IPSET_CB_PRIVATE] = 0; @@ -1141,8 +1377,7 @@ mtype_list(const struct ip_set *set, if (!test_bit(i, n->used)) continue; e = ahash_data(n, i, set->dsize); - if (SET_WITH_TIMEOUT(set) && - ip_set_timeout_expired(ext_timeout(e, set))) + if (SET_ELEM_EXPIRED(set, e)) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[IPSET_CB_ARG0], n, i, e); @@ -1208,6 +1443,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .region_lock = true, }; #ifdef IP_SET_EMIT_CREATE @@ -1226,6 +1462,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, size_t hsize; struct htype *h; struct htable *t; + u32 i; pr_debug("Create set %s with family %s\n", set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); @@ -1294,6 +1531,15 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, kfree(h); return -ENOMEM; } + t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); + if (!t->hregion) { + kfree(t); + kfree(h); + return -ENOMEM; + } + h->gc.set = set; + for (i = 0; i < ahash_numof_locks(hbits); i++) + spin_lock_init(&t->hregion[i].lock); h->maxelem = maxelem; #ifdef IP_SET_HASH_WITH_NETMASK h->netmask = netmask; @@ -1304,9 +1550,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, get_random_bytes(&h->initval, sizeof(h->initval)); t->htable_bits = hbits; + t->maxelem = h->maxelem / ahash_numof_locks(hbits); RCU_INIT_POINTER(h->table, t); - h->set = set; + INIT_LIST_HEAD(&h->ad); set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { @@ -1329,12 +1576,10 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) #endif - IPSET_TOKEN(HTYPE, 4_gc_init)(set, - IPSET_TOKEN(HTYPE, 4_gc)); + IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); #ifndef IP_SET_PROTO_UNDEF else - IPSET_TOKEN(HTYPE, 6_gc_init)(set, - IPSET_TOKEN(HTYPE, 6_gc)); + IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); #endif } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 410809c669e1..4912069627b6 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -411,7 +411,7 @@ static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu + 1; return per_cpu_ptr(net->ct.stat, cpu); } - + (*pos)++; return NULL; } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index e33a73cb1f42..86eefb613b08 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -348,9 +348,6 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; - if (flow->flags & FLOW_OFFLOAD_HW) - nf_flow_offload_stats(flow_table, flow); - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))) { if (flow->flags & FLOW_OFFLOAD_HW) { @@ -361,6 +358,8 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) } else { flow_offload_del(flow_table, flow); } + } else if (flow->flags & FLOW_OFFLOAD_HW) { + nf_flow_offload_stats(flow_table, flow); } } @@ -530,9 +529,9 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data) static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable, struct net_device *dev) { - nf_flow_table_offload_flush(flowtable); nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); flush_delayed_work(&flowtable->gc_work); + nf_flow_table_offload_flush(flowtable); } void nf_flow_table_cleanup(struct net_device *dev) @@ -554,6 +553,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); + nf_flow_table_offload_flush(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index d06969af1085..b879e673953f 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -24,6 +24,7 @@ struct flow_offload_work { }; struct nf_flow_key { + struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; struct flow_dissector_key_basic basic; union { @@ -55,6 +56,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, struct nf_flow_key *mask = &match->mask; struct nf_flow_key *key = &match->key; + NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); @@ -62,6 +64,9 @@ static int nf_flow_rule_match(struct nf_flow_match *match, NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp); NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp); + key->meta.ingress_ifindex = tuple->iifidx; + mask->meta.ingress_ifindex = 0xffffffff; + switch (tuple->l3proto) { case AF_INET: key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -105,7 +110,8 @@ static int nf_flow_rule_match(struct nf_flow_match *match, key->tp.dst = tuple->dst_port; mask->tp.dst = 0xffff; - match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) | + match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_PORTS); return 0; @@ -784,8 +790,7 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, __s32 delta; delta = nf_flow_timeout_delta(flow->timeout); - if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) || - flow->flags & FLOW_OFFLOAD_HW_DYING) + if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10)) return; offload = kzalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index b0930d4aba22..b9cbe1e2453e 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -267,7 +267,7 @@ static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu + 1; return per_cpu_ptr(snet->stats, cpu); } - + (*pos)++; return NULL; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e63b481cc86..11a2a7b5312e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1405,6 +1405,11 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, lockdep_commit_lock_is_held(net)); if (nft_dump_stats(skb, stats)) goto nla_put_failure; + + if ((chain->flags & NFT_CHAIN_HW_OFFLOAD) && + nla_put_be32(skb, NFTA_CHAIN_FLAGS, + htonl(NFT_CHAIN_HW_OFFLOAD))) + goto nla_put_failure; } if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) @@ -6172,8 +6177,13 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, goto err4; err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable); - if (err < 0) + if (err < 0) { + list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { + list_del_rcu(&hook->list); + kfree_rcu(hook, rcu); + } goto err4; + } err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) @@ -7250,13 +7260,8 @@ static void nf_tables_module_autoload(struct net *net) list_splice_init(&net->nft.module_list, &module_list); mutex_unlock(&net->nft.commit_mutex); list_for_each_entry_safe(req, next, &module_list, list) { - if (req->done) { - list_del(&req->list); - kfree(req); - } else { - request_module("%s", req->module); - req->done = true; - } + request_module("%s", req->module); + req->done = true; } mutex_lock(&net->nft.commit_mutex); list_splice(&module_list, &net->nft.module_list); @@ -8039,6 +8044,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); + WARN_ON_ONCE(!list_empty(&net->nft.module_list)); } static struct pernet_operations nf_tables_net_ops = { diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index de3a9596b7f1..a5f294aa8e4c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -742,6 +742,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index ff9ac8ae0031..eac4a901233f 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -89,6 +89,7 @@ static const struct nft_chain_type nft_chain_nat_inet = { .name = "nat", .type = NFT_CHAIN_T_NAT, .family = NFPROTO_INET, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 1993af3a2979..a7de3a58f553 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -129,6 +129,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 5284fcf16be7..1effd4878619 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -248,8 +248,9 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr, } static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = { + [NFTA_TUNNEL_KEY_ERSPAN_VERSION] = { .type = NLA_U32 }, [NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX] = { .type = NLA_U32 }, - [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 }, + [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 }, [NFTA_TUNNEL_KEY_ERSPAN_V2_HWID] = { .type = NLA_U8 }, }; @@ -338,6 +339,8 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = [NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, }, [NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, }, + [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, }, + [NFTA_TUNNEL_KEY_DPORT] = { .type = NLA_U16, }, [NFTA_TUNNEL_KEY_OPTS] = { .type = NLA_NESTED, }, }; @@ -504,8 +507,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb, static int nft_tunnel_ports_dump(struct sk_buff *skb, struct ip_tunnel_info *info) { - if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, htons(info->key.tp_src)) < 0 || - nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, htons(info->key.tp_dst)) < 0) + if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 || + nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0) return -1; return 0; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ce70c2576bb2..44f971f31992 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1551,6 +1551,9 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; + if (ppos != NULL) + ++(*ppos); + switch (trav->class) { case MTTG_TRAV_INIT: trav->class = MTTG_TRAV_NFP_UNSPEC; @@ -1576,9 +1579,6 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, default: return NULL; } - - if (ppos != NULL) - ++*ppos; return trav; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index ced3fc8fad7c..8c835ad63729 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ @@ -114,7 +115,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ - int use; + refcount_t use; u_int8_t family; bool rnd_initialized; @@ -315,7 +316,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, for (i = 0; i < hinfo->cfg.size; i++) INIT_HLIST_HEAD(&hinfo->hash[i]); - hinfo->use = 1; + refcount_set(&hinfo->use, 1); hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; @@ -357,21 +358,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, return 0; } -static bool select_all(const struct xt_hashlimit_htable *ht, - const struct dsthash_ent *he) -{ - return true; -} - -static bool select_gc(const struct xt_hashlimit_htable *ht, - const struct dsthash_ent *he) -{ - return time_after_eq(jiffies, he->expires); -} - -static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, - bool (*select)(const struct xt_hashlimit_htable *ht, - const struct dsthash_ent *he)) +static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, bool select_all) { unsigned int i; @@ -381,7 +368,7 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, spin_lock_bh(&ht->lock); hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { - if ((*select)(ht, dh)) + if (time_after_eq(jiffies, dh->expires) || select_all) dsthash_free(ht, dh); } spin_unlock_bh(&ht->lock); @@ -395,7 +382,7 @@ static void htable_gc(struct work_struct *work) ht = container_of(work, struct xt_hashlimit_htable, gc_work.work); - htable_selective_cleanup(ht, select_gc); + htable_selective_cleanup(ht, false); queue_delayed_work(system_power_efficient_wq, &ht->gc_work, msecs_to_jiffies(ht->cfg.gc_interval)); @@ -415,15 +402,6 @@ static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) remove_proc_entry(hinfo->name, parent); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) -{ - cancel_delayed_work_sync(&hinfo->gc_work); - htable_remove_proc_entry(hinfo); - htable_selective_cleanup(hinfo, select_all); - kfree(hinfo->name); - vfree(hinfo); -} - static struct xt_hashlimit_htable *htable_find_get(struct net *net, const char *name, u_int8_t family) @@ -434,7 +412,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { if (!strcmp(name, hinfo->name) && hinfo->family == family) { - hinfo->use++; + refcount_inc(&hinfo->use); return hinfo; } } @@ -443,12 +421,16 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net, static void htable_put(struct xt_hashlimit_htable *hinfo) { - mutex_lock(&hashlimit_mutex); - if (--hinfo->use == 0) { + if (refcount_dec_and_mutex_lock(&hinfo->use, &hashlimit_mutex)) { hlist_del(&hinfo->node); - htable_destroy(hinfo); + htable_remove_proc_entry(hinfo); + mutex_unlock(&hashlimit_mutex); + + cancel_delayed_work_sync(&hinfo->gc_work); + htable_selective_cleanup(hinfo, true); + kfree(hinfo->name); + vfree(hinfo); } - mutex_unlock(&hashlimit_mutex); } /* The algorithm used is the Simple Token Bucket Filter (TBF) @@ -851,6 +833,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -861,6 +845,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 781e0b482189..6c2582a19766 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -492,12 +492,12 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) const struct recent_entry *e = v; const struct list_head *head = e->list.next; + (*pos)++; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) return NULL; head = t->iphash[st->bucket].next; } - (*pos)++; return list_entry(head, struct recent_entry, list); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4e31721e7293..5313f1cec170 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1014,7 +1014,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1033,7 +1034,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } @@ -2433,7 +2434,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6f1b096e601c..43811b5219b5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -181,13 +181,20 @@ void nfc_hci_resp_received(struct nfc_hci_dev *hdev, u8 result, void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -375,8 +382,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index eee0dddb7749..e894254c17d4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -32,6 +32,7 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING, .len = NFC_DEVICE_NAME_MAXSIZE }, [NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_TARGET_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_COMM_MODE] = { .type = NLA_U8 }, [NFC_ATTR_RF_MODE] = { .type = NLA_U8 }, [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, @@ -43,7 +44,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e3a37d22539c..c82b88693fe2 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -647,6 +647,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, + [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, }; static const struct genl_ops dp_packet_genl_ops[] = { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 118cd66b7516..20edb7c25e22 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2273,6 +2273,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2285,12 +2292,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d3c4d2d893a..fe42f986cd94 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -194,6 +194,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); rxrpc_unuse_local(local); + rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -899,6 +900,7 @@ static int rxrpc_release_sock(struct sock *sk) rxrpc_purge_queue(&sk->sk_receive_queue); rxrpc_unuse_local(rx->local); + rxrpc_put_local(rx->local); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 5e99df80e80a..7d730c438404 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -490,6 +490,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_IS_INTR, /* The call is interruptible */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* @@ -1021,6 +1022,16 @@ void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *); +static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +{ + return atomic_dec_return(&local->active_users) == 0; +} + +static inline bool __rxrpc_use_local(struct rxrpc_local *local) +{ + return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; +} + /* * misc.c */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a31c18c09894..c9f34b0a11df 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,7 +493,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); @@ -562,13 +562,14 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Final call destruction under RCU. + * Final call destruction - but must be done in process context. */ -static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) +static void rxrpc_destroy_call(struct work_struct *work) { - struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); struct rxrpc_net *rxnet = call->rxnet; + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -577,6 +578,22 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) wake_up_var(&rxnet->nr_calls); } +/* + * Final call destruction under RCU. + */ +static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) +{ + struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + + if (in_softirq()) { + INIT_WORK(&call->processor, rxrpc_destroy_call); + if (!rxrpc_queue_work(&call->processor)) + BUG(); + } else { + rxrpc_destroy_call(&call->processor); + } +} + /* * clean up a call */ @@ -590,7 +607,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); rxrpc_cleanup_ring(call); rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 376370cd9285..ea7d4c21f889 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -785,6 +785,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) u32 cid; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); cid = call->cid; if (cid) { @@ -792,7 +793,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) chan = &conn->channels[channel]; } trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -908,7 +908,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 808a4723f868..06fcff2ebbba 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -438,16 +438,12 @@ static void rxrpc_process_delayed_final_acks(struct rxrpc_connection *conn) /* * connection-level event processor */ -void rxrpc_process_connection(struct work_struct *work) +static void rxrpc_do_process_connection(struct rxrpc_connection *conn) { - struct rxrpc_connection *conn = - container_of(work, struct rxrpc_connection, processor); struct sk_buff *skb; u32 abort_code = RX_PROTOCOL_ERROR; int ret; - rxrpc_see_connection(conn); - if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -475,18 +471,32 @@ void rxrpc_process_connection(struct work_struct *work) } } -out: - rxrpc_put_connection(conn); - _leave(""); return; requeue_and_leave: skb_queue_head(&conn->rx_queue, skb); - goto out; + return; protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; rxrpc_free_skb(skb, rxrpc_skb_freed); - goto out; + return; +} + +void rxrpc_process_connection(struct work_struct *work) +{ + struct rxrpc_connection *conn = + container_of(work, struct rxrpc_connection, processor); + + rxrpc_see_connection(conn); + + if (__rxrpc_use_local(conn->params.local)) { + rxrpc_do_process_connection(conn); + rxrpc_unuse_local(conn->params.local); + } + + rxrpc_put_connection(conn); + _leave(""); + return; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 38d718e90dc6..19e141eeed17 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -223,9 +223,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 86bd133b4fa0..ef10fbf71b15 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -413,7 +413,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); enum rxrpc_call_state state; - unsigned int j; + unsigned int j, nr_subpackets; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; rxrpc_seq_t seq0 = sp->hdr.seq, hard_ack; bool immediate_ack = false, jumbo_bad = false; @@ -457,7 +457,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) call->ackr_prev_seq = seq0; hard_ack = READ_ONCE(call->rx_hard_ack); - if (sp->nr_subpackets > 1) { + nr_subpackets = sp->nr_subpackets; + if (nr_subpackets > 1) { if (call->nr_jumbo_bad > 3) { ack = RXRPC_ACK_NOSPACE; ack_serial = serial; @@ -465,11 +466,11 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } } - for (j = 0; j < sp->nr_subpackets; j++) { + for (j = 0; j < nr_subpackets; j++) { rxrpc_serial_t serial = sp->hdr.serial + j; rxrpc_seq_t seq = seq0 + j; unsigned int ix = seq & RXRPC_RXTX_BUFF_MASK; - bool terminal = (j == sp->nr_subpackets - 1); + bool terminal = (j == nr_subpackets - 1); bool last = terminal && (sp->rx_flags & RXRPC_SKB_INCL_LAST); u8 flags, annotation = j; @@ -506,7 +507,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } if (call->rxtx_buffer[ix]) { - rxrpc_input_dup_data(call, seq, sp->nr_subpackets > 1, + rxrpc_input_dup_data(call, seq, nr_subpackets > 1, &jumbo_bad); if (ack != RXRPC_ACK_DUPLICATE) { ack = RXRPC_ACK_DUPLICATE; @@ -564,6 +565,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) * ring. */ skb = NULL; + sp = NULL; } if (last) { @@ -597,10 +599,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) false, true, rxrpc_propose_ack_input_data); - if (seq0 == READ_ONCE(call->rx_hard_ack) + 1) { - trace_rxrpc_notify_socket(call->debug_id, serial); - rxrpc_notify_socket(call); - } + trace_rxrpc_notify_socket(call->debug_id, serial); + rxrpc_notify_socket(call); unlock: spin_unlock(&call->input_lock); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 36587260cabd..a6c1349e965d 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -364,11 +364,14 @@ void rxrpc_queue_local(struct rxrpc_local *local) void rxrpc_put_local(struct rxrpc_local *local) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (local) { + debug_id = local->debug_id; + n = atomic_dec_return(&local->usage); - trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here); + trace_rxrpc_local(debug_id, rxrpc_local_put, n, here); if (n == 0) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -380,14 +383,11 @@ void rxrpc_put_local(struct rxrpc_local *local) */ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) { - unsigned int au; - local = rxrpc_get_local_maybe(local); if (!local) return NULL; - au = atomic_fetch_add_unless(&local->active_users, 1, 0); - if (au == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_put_local(local); return NULL; } @@ -401,14 +401,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) */ void rxrpc_unuse_local(struct rxrpc_local *local) { - unsigned int au; - if (local) { - au = atomic_dec_return(&local->active_users); - if (au == 0) + if (__rxrpc_unuse_local(local)) { + rxrpc_get_local(local); rxrpc_queue_local(local); - else - rxrpc_put_local(local); + } } } @@ -465,7 +462,7 @@ static void rxrpc_local_processor(struct work_struct *work) do { again = false; - if (atomic_read(&local->active_users) == 0) { + if (!__rxrpc_use_local(local)) { rxrpc_local_destroyer(local); break; } @@ -479,6 +476,8 @@ static void rxrpc_local_processor(struct work_struct *work) rxrpc_process_local_events(local); again = true; } + + __rxrpc_unuse_local(local); } while (again); rxrpc_put_local(local); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 935bb60fff56..bad3d2420344 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -129,7 +129,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, rxrpc_serial_t *_serial) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -139,18 +139,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -244,7 +240,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -254,7 +249,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; @@ -271,13 +266,11 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) test_bit(RXRPC_CALL_TX_LAST, &call->flags)) return 0; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -312,8 +305,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr, rxrpc_tx_point_call_abort); rxrpc_tx_backoff(call, ret); - - rxrpc_put_connection(conn); return ret; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 48f67a9b1037..923b263c401b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -364,27 +364,31 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, if (!rxrpc_get_peer_maybe(peer)) continue; - spin_unlock_bh(&rxnet->peer_hash_lock); - - keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; - slot = keepalive_at - base; - _debug("%02x peer %u t=%d {%pISp}", - cursor, peer->debug_id, slot, &peer->srx.transport); + if (__rxrpc_use_local(peer->local)) { + spin_unlock_bh(&rxnet->peer_hash_lock); + + keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; + slot = keepalive_at - base; + _debug("%02x peer %u t=%d {%pISp}", + cursor, peer->debug_id, slot, &peer->srx.transport); + + if (keepalive_at <= base || + keepalive_at > base + RXRPC_KEEPALIVE_TIME) { + rxrpc_send_keepalive(peer); + slot = RXRPC_KEEPALIVE_TIME; + } - if (keepalive_at <= base || - keepalive_at > base + RXRPC_KEEPALIVE_TIME) { - rxrpc_send_keepalive(peer); - slot = RXRPC_KEEPALIVE_TIME; + /* A transmission to this peer occurred since last we + * examined it so put it into the appropriate future + * bucket. + */ + slot += cursor; + slot &= mask; + spin_lock_bh(&rxnet->peer_hash_lock); + list_add_tail(&peer->keepalive_link, + &rxnet->peer_keepalive[slot & mask]); + rxrpc_unuse_local(peer->local); } - - /* A transmission to this peer occurred since last we examined - * it so put it into the appropriate future bucket. - */ - slot += cursor; - slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); - list_add_tail(&peer->keepalive_link, - &rxnet->peer_keepalive[slot & mask]); rxrpc_put_peer_locked(peer); } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4aafbe3d435c..f256a7c69093 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -263,12 +263,17 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, } } -static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct basic_filter *f = fh; - if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 8229ed4a67be..6e3e63db0e01 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -631,12 +631,17 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl, + void *q, unsigned long base) { struct cls_bpf_prog *prog = fh; - if (prog && prog->res.classid == classid) - prog->res.class = cl; + if (prog && prog->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &prog->res, base); + else + __tcf_unbind_filter(q, &prog->res); + } } static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index b0f42e62dd76..d32d4233d337 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -305,6 +305,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; list_for_each_entry_rcu(mask, &head->masks, list) { + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, mask); skb_flow_dissect_meta(skb, &mask->dissector, &skb_key); @@ -691,6 +692,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { .len = 128 / BITS_PER_BYTE }, [TCA_FLOWER_KEY_CT_LABELS_MASK] = { .type = NLA_BINARY, .len = 128 / BITS_PER_BYTE }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -2765,12 +2767,17 @@ static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) return -EMSGSIZE; } -static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct cls_fl_filter *f = fh; - if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } } static bool fl_delete_empty(struct tcf_proto *tp) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c9496c920d6f..ec945294626a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -419,12 +419,17 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct fw_filter *f = fh; - if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } } static struct tcf_proto_ops cls_fw_ops __read_mostly = { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 7fc2eb62aa98..610a0b728161 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -157,6 +157,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, @@ -393,12 +394,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct cls_mall_head *head = fh; - if (head && head->res.classid == classid) - head->res.class = cl; + if (head && head->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &head->res, base); + else + __tcf_unbind_filter(q, &head->res); + } } static struct tcf_proto_ops cls_mall_ops __read_mostly = { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 2d9e0b4484ea..6f8786b06bde 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -641,12 +641,17 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct route4_filter *f = fh; - if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } } static struct tcf_proto_ops cls_route4_ops __read_mostly = { diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 2f3c03b25d5d..d36949d9382c 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -463,10 +463,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; @@ -738,12 +736,17 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct rsvp_filter *f = fh; - if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } } static struct tcf_proto_ops RSVP_OPS __read_mostly = { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e573e5a5c794..09b7dc5fe7e0 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -333,12 +333,31 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + + if (tb[TCA_TCINDEX_MASK]) + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + + if (tb[TCA_TCINDEX_SHIFT]) + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + if (p->perfect) { int i; if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; - for (i = 0; i < cp->hash; i++) + for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; } @@ -346,19 +365,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result, net); if (err < 0) - goto errout1; + goto errout_alloc; if (old_r) cr = r->res; - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -376,16 +386,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; @@ -484,7 +484,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); -errout1: tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); @@ -654,12 +653,17 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; } -static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, + void *q, unsigned long base) { struct tcindex_filter_result *r = fh; - if (r && r->res.classid == classid) - r->res.class = cl; + if (r && r->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &r->res, base); + else + __tcf_unbind_filter(q, &r->res); + } } static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a0e6fac613de..e15ff335953d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1255,12 +1255,17 @@ static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } -static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct tc_u_knode *n = fh; - if (n && n->res.classid == classid) - n->res.class = cl; + if (n && n->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &n->res, base); + else + __tcf_unbind_filter(q, &n->res); + } } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/ematch.c b/net/sched/ematch.c index d0140a92694a..dd3b8c11a2e0 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -238,6 +238,9 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; if (em->ops->change) { + err = -EINVAL; + if (em_hdr->flags & TCF_EM_SIMPLE) + goto errout; err = em->ops->change(net, data, data_len, em); if (err < 0) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1047825d9f48..50794125bf02 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1891,8 +1891,9 @@ static int tclass_del_notify(struct net *net, struct tcf_bind_args { struct tcf_walker w; - u32 classid; + unsigned long base; unsigned long cl; + u32 classid; }; static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) @@ -1903,28 +1904,30 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) struct Qdisc *q = tcf_block_q(tp->chain->block); sch_tree_lock(q); - tp->ops->bind_class(n, a->classid, a->cl); + tp->ops->bind_class(n, a->classid, a->cl, q, a->base); sch_tree_unlock(q); } return 0; } -static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, - unsigned long new_cl) +struct tc_bind_class_args { + struct qdisc_walker w; + unsigned long new_cl; + u32 portid; + u32 clid; +}; + +static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w) { + struct tc_bind_class_args *a = (struct tc_bind_class_args *)w; const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct tcf_block *block; struct tcf_chain *chain; - unsigned long cl; - cl = cops->find(q, portid); - if (!cl) - return; - if (!cops->tcf_block) - return; block = cops->tcf_block(q, cl, NULL); if (!block) - return; + return 0; for (chain = tcf_get_next_chain(block, NULL); chain; chain = tcf_get_next_chain(block, chain)) { @@ -1935,11 +1938,29 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; - arg.classid = clid; - arg.cl = new_cl; + arg.classid = a->clid; + arg.base = cl; + arg.cl = a->new_cl; tp->ops->walk(tp, &arg.w, true); } } + + return 0; +} + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct tc_bind_class_args args = {}; + + if (!cops->tcf_block) + return; + args.portid = portid; + args.clid = clid; + args.new_cl = new_cl; + args.w.fn = tc_bind_class_walker; + q->ops->cl_ops->walk(q, &args.w); } #else diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a5a295477ecc..371ad84def3b 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -744,6 +744,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, [TCA_FQ_CE_THRESHOLD] = { .type = NLA_U32 }, }; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index c609373c8661..b1eb12d33b9a 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) +#define TAPRIO_FLAGS_INVALID U32_MAX struct sched_entry { struct list_head list; @@ -563,8 +564,10 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) prio = skb->priority; tc = netdev_get_prio_tc_map(dev, prio); - if (!(gate_mask & BIT(tc))) + if (!(gate_mask & BIT(tc))) { + skb = NULL; continue; + } len = qdisc_pkt_len(skb); guard = ktime_add_ns(taprio_get_time(q), @@ -574,13 +577,17 @@ static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) * guard band ... */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && - ktime_after(guard, entry->close_time)) + ktime_after(guard, entry->close_time)) { + skb = NULL; continue; + } /* ... and no budget. */ if (gate_mask != TAPRIO_ALL_GATES_OPEN && - atomic_sub_return(len, &entry->budget) < 0) + atomic_sub_return(len, &entry->budget) < 0) { + skb = NULL; continue; + } skb = child->ops->dequeue(child); if (unlikely(!skb)) @@ -766,6 +773,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, + [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, @@ -1367,6 +1376,33 @@ static int taprio_mqprio_cmp(const struct net_device *dev, return 0; } +/* The semantics of the 'flags' argument in relation to 'change()' + * requests, are interpreted following two rules (which are applied in + * this order): (1) an omitted 'flags' argument is interpreted as + * zero; (2) the 'flags' of a "running" taprio instance cannot be + * changed. + */ +static int taprio_new_flags(const struct nlattr *attr, u32 old, + struct netlink_ext_ack *extack) +{ + u32 new = 0; + + if (attr) + new = nla_get_u32(attr); + + if (old != TAPRIO_FLAGS_INVALID && old != new) { + NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); + return -EOPNOTSUPP; + } + + if (!taprio_flags_valid(new)) { + NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); + return -EINVAL; + } + + return new; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1375,7 +1411,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; - u32 taprio_flags = 0; unsigned long flags; ktime_t start; int i, err; @@ -1388,21 +1423,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); - if (tb[TCA_TAPRIO_ATTR_FLAGS]) { - taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); - - if (q->flags != 0 && q->flags != taprio_flags) { - NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); - return -EOPNOTSUPP; - } else if (!taprio_flags_valid(taprio_flags)) { - NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); - return -EINVAL; - } + err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS], + q->flags, extack); + if (err < 0) + return err; - q->flags = taprio_flags; - } + q->flags = err; - err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); + err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags); if (err < 0) return err; @@ -1444,7 +1472,20 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_set_picos_per_byte(dev, q); - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (mqprio) { + netdev_set_num_tc(dev, mqprio->num_tc); + for (i = 0; i < mqprio->num_tc; i++) + netdev_set_tc_queue(dev, i, + mqprio->count[i], + mqprio->offset[i]); + + /* Always use supplied priority mappings */ + for (i = 0; i <= TC_BITMASK; i++) + netdev_set_prio_tc_map(dev, i, + mqprio->prio_tc_map[i]); + } + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); else err = taprio_disable_offload(dev, q, extack); @@ -1464,27 +1505,14 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); } - if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && - !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && + if (!TXTIME_ASSIST_IS_ENABLED(q->flags) && + !FULL_OFFLOAD_IS_ENABLED(q->flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; } - if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); - for (i = 0; i < mqprio->num_tc; i++) - netdev_set_tc_queue(dev, i, - mqprio->count[i], - mqprio->offset[i]); - - /* Always use supplied priority mappings */ - for (i = 0; i <= TC_BITMASK; i++) - netdev_set_prio_tc_map(dev, i, - mqprio->prio_tc_map[i]); - } - - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { q->dequeue = taprio_dequeue_offload; q->peek = taprio_peek_offload; } else { @@ -1501,9 +1529,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto unlock; } - if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { - setup_txtime(q, new_admin, start); + setup_txtime(q, new_admin, start); + if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { if (!oper) { rcu_assign_pointer(q->oper_sched, new_admin); err = 0; @@ -1528,7 +1556,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, spin_unlock_irqrestore(&q->current_entry_lock, flags); - if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) taprio_offload_config_changed(q); } @@ -1567,7 +1595,7 @@ static void taprio_destroy(struct Qdisc *sch) } q->qdiscs = NULL; - netdev_set_num_tc(dev, 0); + netdev_reset_tc(dev); if (q->oper_sched) call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); @@ -1597,6 +1625,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, * and get the valid one on taprio_change(). */ q->clockid = -1; + q->flags = TAPRIO_FLAGS_INVALID; spin_lock(&taprio_list_lock); list_add(&q->taprio_list, &taprio_list); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 8a15146faaeb..1069d7af3672 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -237,15 +237,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 4ab8208a2dd4..c6d83a64eac3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -170,6 +170,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2255,6 +2265,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2298,6 +2311,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2565,6 +2581,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2582,16 +2601,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index cee5bf4a9bb9..90988a511cd5 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -470,6 +470,8 @@ static void smc_switch_to_fallback(struct smc_sock *smc) if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; + smc->clcsock->wq.fasync_list = + smc->sk.sk_socket->wq.fasync_list; } } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 0879f7bed967..86cccc24e52e 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -372,7 +372,9 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = SMC_CLC_V1; dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; - memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); + if (smc->conn.lgr && !smc->conn.lgr->is_smcd) + memcpy(dclc.id_for_peer, local_systemid, + sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index f38727ecf8b2..e1f64f4ba236 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -39,16 +39,15 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); if (sk->sk_protocol == SMCPROTO_SMC) { - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 548632621f4b..05b825b3cfa4 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -573,6 +573,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ @@ -580,6 +582,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) smc_smcr_terminate_all(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); + cancel_work_sync(&smcibdev->port_event_work); kfree(smcibdev); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index c62d1f10978b..cff77f096647 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1248,6 +1248,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1268,6 +1269,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f740cb51802a..7ede1e52fd81 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1888,7 +1888,9 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) if (!hlist_unhashed(&h->cache_list)){ hlist_del_init_rcu(&h->cache_list); cd->entries--; + set_bit(CACHE_CLEANED, &h->flags); spin_unlock(&cd->hash_lock); + cache_fresh_unlocked(h, cd); cache_put(h, cd); } else spin_unlock(&cd->hash_lock); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 523722be6a16..45366570ea65 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -298,8 +298,8 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct ib_reg_wr *reg_wr; + int i, n, dma_nents; struct ib_mr *ibmr; - int i, n; u8 key; if (nsegs > ia->ri_max_frwr_depth) @@ -323,15 +323,16 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, break; } mr->mr_dir = rpcrdma_data_dir(writing); + mr->mr_nents = i; - mr->mr_nents = - ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, i, mr->mr_dir); - if (!mr->mr_nents) + dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents, + mr->mr_dir); + if (!dma_nents) goto out_dmamap_err; ibmr = mr->frwr.fr_mr; - n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE); - if (unlikely(n != mr->mr_nents)) + n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE); + if (n != dma_nents) goto out_mapmr_err; ibmr->iova &= 0x00000000ffffffff; diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index e53231bd23b4..85f957689d32 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -115,6 +115,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = { [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 }, [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 }, [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }, + [TIPC_NLA_PROP_MTU] = { .type = NLA_U32 }, [TIPC_NLA_PROP_BROADCAST] = { .type = NLA_U32 }, [TIPC_NLA_PROP_BROADCAST_RATIO] = { .type = NLA_U32 } }; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9b4fb92c0b1..693e8902161e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2441,6 +2441,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return -ETIMEDOUT; if (signal_pending(current)) return sock_intr_errno(*timeo_p); + if (sk->sk_state == TIPC_DISCONNECTING) + break; add_wait_queue(sk_sleep(sk), &wait); done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk), diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index cd91ad812291..e72d7d787935 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -592,7 +592,7 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn) { u64 record_sn = context->hint_record_sn; - struct tls_record_info *info; + struct tls_record_info *info, *last; info = context->retransmit_hint; if (!info || @@ -604,6 +604,24 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, struct tls_record_info, list); if (!info) return NULL; + /* send the start_marker record if seq number is before the + * tls offload start marker sequence number. This record is + * required to handle TCP packets which are before TLS offload + * started. + * And if it's not start marker, look if this seq number + * belongs to the list. + */ + if (likely(!tls_record_is_start_marker(info))) { + /* we have the first record, get the last record to see + * if this seq number belongs to the list. + */ + last = list_last_entry(&context->records_list, + struct tls_record_info, list); + + if (!between(seq, tls_record_start_seq(info), + last->end_seq)) + return NULL; + } record_sn = context->unacked_record_sn; } diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d..24e18405cdb4 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e97ac5435b2..c7e30f5818d6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -437,6 +437,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, @@ -468,6 +469,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1), + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_MESH_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, @@ -529,6 +532,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, @@ -559,6 +564,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1), [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN @@ -4799,8 +4805,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms.he_obss_pd); - if (err) - return err; + goto out; } nl80211_calculate_ap_params(¶ms); @@ -4822,6 +4827,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } wdev_unlock(wdev); +out: kfree(params.acl); return err; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index fff9a74891fc..1a8218f1bbe0 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2276,7 +2276,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1fc42ad8ff49..06ebe3104cc0 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -38,6 +38,8 @@ tprogs-y += tc_l2_redirect tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map +tprogs-y += per_socket_stats_example +tprogs-y += xdp_redirect tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_cpu tprogs-y += xdp_monitor @@ -234,6 +236,7 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ readelf -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) +BPF_EXTRA_CFLAGS += -fno-stack-protector ifneq ($(BTF_LLVM_PROBE),) BPF_EXTRA_CFLAGS += -g else @@ -251,7 +254,7 @@ all: clean: $(MAKE) -C ../../ M=$(CURDIR) clean - @rm -f *~ + @find $(CURDIR) -type f -name '*~' -delete $(LIBBPF): FORCE # Fix up variables inherited from Kbuild that tools/ build system won't like diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 0da6e9e7132e..8b862a7a6c6a 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -16,6 +16,10 @@ static const char *__doc__ = #include #include #include +#include + +#define __must_check +#include #include #include @@ -46,6 +50,10 @@ static int cpus_count_map_fd; static int cpus_iterator_map_fd; static int exception_cnt_map_fd; +#define NUM_TP 5 +struct bpf_link *tp_links[NUM_TP] = { 0 }; +static int tp_cnt = 0; + /* Exit return codes */ #define EXIT_OK 0 #define EXIT_FAIL 1 @@ -88,6 +96,10 @@ static void int_exit(int sig) printf("program on interface changed, not removing\n"); } } + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + exit(EXIT_OK); } @@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, free_stats_record(prev); } +static struct bpf_link * attach_tp(struct bpf_object *obj, + const char *tp_category, + const char* tp_name) +{ + struct bpf_program *prog; + struct bpf_link *link; + char sec_name[PATH_MAX]; + int len; + + len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", + tp_category, tp_name); + if (len < 0) + exit(EXIT_FAIL); + + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); + exit(EXIT_FAIL_BPF); + } + + link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); + if (IS_ERR(link)) + exit(EXIT_FAIL_BPF); + + return link; +} + +static void init_tracepoints(struct bpf_object *obj) { + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); +} + static int init_map_fds(struct bpf_object *obj) { - cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); - rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + /* Maps updated by tracepoints */ redirect_err_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); cpumap_enqueue_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); cpumap_kthread_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + + /* Maps used by XDP */ + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); cpus_available_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_available"); cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); cpus_iterator_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); - exception_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "exception_cnt"); if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || @@ -662,6 +712,7 @@ int main(int argc, char **argv) strerror(errno)); return EXIT_FAIL; } + init_tracepoints(obj); if (init_map_fds(obj) < 0) { fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index bc5f25763c1b..f3155af04d85 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -55,14 +55,13 @@ kecho := $($(quiet)kecho) # - stdin is piped in from the first prerequisite ($<) so one has # to specify a valid file as first prerequisite (often the kbuild file) define filechk - $(Q)set -e; \ - mkdir -p $(dir $@); \ - { $(filechk_$(1)); } > $@.tmp; \ - if [ -r $@ ] && cmp -s $@ $@.tmp; then \ - rm -f $@.tmp; \ - else \ - $(kecho) ' UPD $@'; \ - mv -f $@.tmp $@; \ + $(Q)set -e; \ + mkdir -p $(dir $@); \ + trap "rm -f $(dot-target).tmp" EXIT; \ + { $(filechk_$(1)); } > $(dot-target).tmp; \ + if [ ! -r $@ ] || ! cmp -s $@ $(dot-target).tmp; then \ + $(kecho) ' UPD $@'; \ + mv -f $(dot-target).tmp $@; \ fi endef diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index d4adfbe42690..77a69ba9cd19 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -25,7 +25,7 @@ failure = $(if-success,$(1),n,y) # $(cc-option,) # Return y if the compiler supports , n otherwise -cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -E -x c /dev/null -o /dev/null) +cc-option = $(success,$(CC) -Werror $(CLANG_FLAGS) $(1) -S -x c /dev/null -o /dev/null) # $(ld-option,) # Return y if the linker supports , n otherwise @@ -40,3 +40,10 @@ $(error-if,$(success, $(LD) -v | grep -q gold), gold linker '$(LD)' not supporte # gcc version including patch level gcc-version := $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) + +# machine bit flags +# $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise. +# $(m64-flag): -m64 if the compiler supports it, or an empty string otherwise. +cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$(1)) +m32-flag := $(cc-option-bit,-m32) +m64-flag := $(cc-option-bit,-m64) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index ecddf83ac142..ca08f2fe7c34 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -Wno-initializer-overrides KBUILD_CFLAGS += -Wno-format KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-format-zero-length +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) endif endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3fa32f83b2d7..a66fc0acad1e 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -291,13 +291,13 @@ DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.yaml quiet_cmd_dtb_check = CHECK $@ cmd_dtb_check = $(DT_CHECKER) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ ; -define rule_dtc_dt_yaml +define rule_dtc $(call cmd_and_fixdep,dtc,yaml) $(call cmd,dtb_check) endef $(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_rule,dtc_dt_yaml) + $(call if_changed_rule,dtc) dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp) diff --git a/scripts/export_report.pl b/scripts/export_report.pl index 548330e8c4e7..feb3d5542a62 100755 --- a/scripts/export_report.pl +++ b/scripts/export_report.pl @@ -94,7 +94,7 @@ sub collectcfiles { # while ( <$module_symvers> ) { chomp; - my (undef, $symbol, $namespace, $module, $gpl) = split('\t'); + my (undef, $symbol, $module, $gpl, $namespace) = split('\t'); $SYMBOL { $symbol } = [ $module , "0" , $symbol, $gpl]; } close($module_symvers); diff --git a/scripts/find-unused-docs.sh b/scripts/find-unused-docs.sh index 3f46f8977dc4..ee6a50e33aba 100755 --- a/scripts/find-unused-docs.sh +++ b/scripts/find-unused-docs.sh @@ -54,7 +54,7 @@ for file in `find $1 -name '*.c'`; do if [[ ${FILES_INCLUDED[$file]+_} ]]; then continue; fi - str=$(scripts/kernel-doc -text -export "$file" 2>/dev/null) + str=$(scripts/kernel-doc -export "$file" 2>/dev/null) if [[ -n "$str" ]]; then echo "$file" fi diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 34085d146fa2..7a228681f89f 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -932,10 +932,6 @@ sub get_maintainers { } } - foreach my $fix (@fixes) { - vcs_add_commit_signers($fix, "blamed_fixes"); - } - foreach my $email (@email_to, @list_to) { $email->[0] = deduplicate_email($email->[0]); } @@ -974,6 +970,10 @@ sub get_maintainers { } } + foreach my $fix (@fixes) { + vcs_add_commit_signers($fix, "blamed_fixes"); + } + my @to = (); if ($email || $email_list) { if ($email) { diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 3569d2dec37c..17298239e363 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1353,7 +1353,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 436379940356..408b5c0b99b1 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -108,13 +108,13 @@ gen_btf() local bin_arch if ! [ -x "$(command -v ${PAHOLE})" ]; then - info "BTF" "${1}: pahole (${PAHOLE}) is not available" + echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" return 1 fi pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') if [ "${pahole_ver}" -lt "113" ]; then - info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" + echo >&2 "BTF: ${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" return 1 fi diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6e892c93d104..a06e9e5c1419 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -307,7 +307,8 @@ static const char *sec_name(struct elf_info *elf, int secindex) static void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym) { - Elf_Shdr *sechdr = &info->sechdrs[sym->st_shndx]; + unsigned int secindex = get_secindex(info, sym); + Elf_Shdr *sechdr = &info->sechdrs[secindex]; unsigned long offset; offset = sym->st_value; @@ -2433,7 +2434,7 @@ static void write_if_changed(struct buffer *b, const char *fname) } /* parse Module.symvers file. line format: - * 0x12345678symbolmodule[[export]something] + * 0x12345678symbolmoduleexportnamespace **/ static void read_dump(const char *fname, unsigned int kernel) { @@ -2446,7 +2447,7 @@ static void read_dump(const char *fname, unsigned int kernel) return; while ((line = get_next_line(&pos, file, size))) { - char *symname, *namespace, *modname, *d, *export, *end; + char *symname, *namespace, *modname, *d, *export; unsigned int crc; struct module *mod; struct symbol *s; @@ -2454,16 +2455,16 @@ static void read_dump(const char *fname, unsigned int kernel) if (!(symname = strchr(line, '\t'))) goto fail; *symname++ = '\0'; - if (!(namespace = strchr(symname, '\t'))) - goto fail; - *namespace++ = '\0'; - if (!(modname = strchr(namespace, '\t'))) + if (!(modname = strchr(symname, '\t'))) goto fail; *modname++ = '\0'; - if ((export = strchr(modname, '\t')) != NULL) - *export++ = '\0'; - if (export && ((end = strchr(export, '\t')) != NULL)) - *end = '\0'; + if (!(export = strchr(modname, '\t'))) + goto fail; + *export++ = '\0'; + if (!(namespace = strchr(export, '\t'))) + goto fail; + *namespace++ = '\0'; + crc = strtoul(line, &d, 16); if (*symname == '\0' || *modname == '\0' || *d != '\0') goto fail; @@ -2514,9 +2515,9 @@ static void write_dump(const char *fname) namespace = symbol->namespace; buf_printf(&buf, "0x%08x\t%s\t%s\t%s\t%s\n", symbol->crc, symbol->name, - namespace ? namespace : "", symbol->module->name, - export_str(symbol->export)); + export_str(symbol->export), + namespace ? namespace : ""); } symbol = symbol->next; } diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl old mode 100644 new mode 100755 diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index d7e987baf127..9b35db2fc777 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -655,6 +655,9 @@ void process_buffer_measurement(const void *buf, int size, int action = 0; u32 secid; + if (!ima_policy_flag) + return; + /* * Both LSM hooks and auxilary based buffer measurements are * based on policy. To avoid code duplication, differentiate diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index ef8dfd47c7e3..0cac399ce713 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -263,7 +263,7 @@ static void ima_lsm_free_rule(struct ima_rule_entry *entry) static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) { struct ima_rule_entry *nentry; - int i, result; + int i; nentry = kmalloc(sizeof(*nentry), GFP_KERNEL); if (!nentry) @@ -277,7 +277,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) memset(nentry->lsm, 0, sizeof_field(struct ima_rule_entry, lsm)); for (i = 0; i < MAX_LSM_RULES; i++) { - if (!entry->lsm[i].rule) + if (!entry->lsm[i].args_p) continue; nentry->lsm[i].type = entry->lsm[i].type; @@ -286,13 +286,13 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) if (!nentry->lsm[i].args_p) goto out_err; - result = security_filter_rule_init(nentry->lsm[i].type, - Audit_equal, - nentry->lsm[i].args_p, - &nentry->lsm[i].rule); - if (result == -EINVAL) - pr_warn("ima: rule for LSM \'%d\' is undefined\n", - entry->lsm[i].type); + security_filter_rule_init(nentry->lsm[i].type, + Audit_equal, + nentry->lsm[i].args_p, + &nentry->lsm[i].rule); + if (!nentry->lsm[i].rule) + pr_warn("rule for LSM \'%s\' is undefined\n", + (char *)entry->lsm[i].args_p); } return nentry; @@ -329,7 +329,7 @@ static void ima_lsm_update_rules(void) list_for_each_entry_safe(entry, e, &ima_policy_rules, list) { needs_update = 0; for (i = 0; i < MAX_LSM_RULES; i++) { - if (entry->lsm[i].rule) { + if (entry->lsm[i].args_p) { needs_update = 1; break; } @@ -339,8 +339,7 @@ static void ima_lsm_update_rules(void) result = ima_lsm_update_rule(entry); if (result) { - pr_err("ima: lsm rule update error %d\n", - result); + pr_err("lsm rule update error %d\n", result); return; } } @@ -357,7 +356,7 @@ int ima_lsm_policy_change(struct notifier_block *nb, unsigned long event, } /** - * ima_match_rules - determine whether an inode matches the measure rule. + * ima_match_rules - determine whether an inode matches the policy rule. * @rule: a pointer to a rule * @inode: a pointer to an inode * @cred: a pointer to a credentials structure for user validation @@ -415,9 +414,12 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode, int rc = 0; u32 osid; - if (!rule->lsm[i].rule) - continue; - + if (!rule->lsm[i].rule) { + if (!rule->lsm[i].args_p) + continue; + else + return false; + } switch (i) { case LSM_OBJ_USER: case LSM_OBJ_ROLE: @@ -823,8 +825,14 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, entry->lsm[lsm_rule].args_p, &entry->lsm[lsm_rule].rule); if (!entry->lsm[lsm_rule].rule) { - kfree(entry->lsm[lsm_rule].args_p); - return -EINVAL; + pr_warn("rule for LSM \'%s\' is undefined\n", + (char *)entry->lsm[lsm_rule].args_p); + + if (ima_rules == &ima_default_rules) { + kfree(entry->lsm[lsm_rule].args_p); + result = -EINVAL; + } else + result = 0; } return result; diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index 111898aad56e..f0c908241966 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -35,16 +35,18 @@ static __init bool uefi_check_ignore_db(void) * Get a certificate list blob from the named EFI variable. */ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, - unsigned long *size) + unsigned long *size, efi_status_t *status) { - efi_status_t status; unsigned long lsize = 4; unsigned long tmpdb[4]; void *db; - status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); - if (status != EFI_BUFFER_TOO_SMALL) { - pr_err("Couldn't get size: 0x%lx\n", status); + *status = efi.get_variable(name, guid, NULL, &lsize, &tmpdb); + if (*status == EFI_NOT_FOUND) + return NULL; + + if (*status != EFI_BUFFER_TOO_SMALL) { + pr_err("Couldn't get size: 0x%lx\n", *status); return NULL; } @@ -52,10 +54,10 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, if (!db) return NULL; - status = efi.get_variable(name, guid, NULL, &lsize, db); - if (status != EFI_SUCCESS) { + *status = efi.get_variable(name, guid, NULL, &lsize, db); + if (*status != EFI_SUCCESS) { kfree(db); - pr_err("Error reading db var: 0x%lx\n", status); + pr_err("Error reading db var: 0x%lx\n", *status); return NULL; } @@ -74,6 +76,7 @@ static int __init load_uefi_certs(void) efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; void *db = NULL, *dbx = NULL, *mok = NULL; unsigned long dbsize = 0, dbxsize = 0, moksize = 0; + efi_status_t status; int rc = 0; if (!efi.get_variable) @@ -83,9 +86,12 @@ static int __init load_uefi_certs(void) * an error if we can't get them. */ if (!uefi_check_ignore_db()) { - db = get_cert_list(L"db", &secure_var, &dbsize); + db = get_cert_list(L"db", &secure_var, &dbsize, &status); if (!db) { - pr_err("MODSIGN: Couldn't get UEFI db list\n"); + if (status == EFI_NOT_FOUND) + pr_debug("MODSIGN: db variable wasn't found\n"); + else + pr_err("MODSIGN: Couldn't get UEFI db list\n"); } else { rc = parse_efi_signature_list("UEFI:db", db, dbsize, get_handler_for_db); @@ -96,9 +102,12 @@ static int __init load_uefi_certs(void) } } - mok = get_cert_list(L"MokListRT", &mok_var, &moksize); + mok = get_cert_list(L"MokListRT", &mok_var, &moksize, &status); if (!mok) { - pr_info("Couldn't get UEFI MokListRT\n"); + if (status == EFI_NOT_FOUND) + pr_debug("MokListRT variable wasn't found\n"); + else + pr_info("Couldn't get UEFI MokListRT\n"); } else { rc = parse_efi_signature_list("UEFI:MokListRT", mok, moksize, get_handler_for_db); @@ -107,9 +116,12 @@ static int __init load_uefi_certs(void) kfree(mok); } - dbx = get_cert_list(L"dbx", &secure_var, &dbxsize); + dbx = get_cert_list(L"dbx", &secure_var, &dbxsize, &status); if (!dbx) { - pr_info("Couldn't get UEFI dbx list\n"); + if (status == EFI_NOT_FOUND) + pr_debug("dbx variable wasn't found\n"); + else + pr_info("Couldn't get UEFI dbx list\n"); } else { rc = parse_efi_signature_list("UEFI:dbx", dbx, dbxsize, diff --git a/security/selinux/avc.c b/security/selinux/avc.c index ecd3829996aa..d18cb32a242a 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -424,7 +424,7 @@ static inline int avc_xperms_audit(struct selinux_state *state, if (likely(!audited)) return 0; return slow_avc_audit(state, ssid, tsid, tclass, requested, - audited, denied, result, ad, 0); + audited, denied, result, ad); } static void avc_node_free(struct rcu_head *rhead) @@ -617,40 +617,37 @@ static struct avc_node *avc_insert(struct selinux_avc *avc, struct avc_node *pos, *node = NULL; int hvalue; unsigned long flag; + spinlock_t *lock; + struct hlist_head *head; if (avc_latest_notif_update(avc, avd->seqno, 1)) - goto out; + return NULL; node = avc_alloc_node(avc); - if (node) { - struct hlist_head *head; - spinlock_t *lock; - int rc = 0; - - hvalue = avc_hash(ssid, tsid, tclass); - avc_node_populate(node, ssid, tsid, tclass, avd); - rc = avc_xperms_populate(node, xp_node); - if (rc) { - kmem_cache_free(avc_node_cachep, node); - return NULL; - } - head = &avc->avc_cache.slots[hvalue]; - lock = &avc->avc_cache.slots_lock[hvalue]; + if (!node) + return NULL; - spin_lock_irqsave(lock, flag); - hlist_for_each_entry(pos, head, list) { - if (pos->ae.ssid == ssid && - pos->ae.tsid == tsid && - pos->ae.tclass == tclass) { - avc_node_replace(avc, node, pos); - goto found; - } + avc_node_populate(node, ssid, tsid, tclass, avd); + if (avc_xperms_populate(node, xp_node)) { + avc_node_kill(avc, node); + return NULL; + } + + hvalue = avc_hash(ssid, tsid, tclass); + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; + spin_lock_irqsave(lock, flag); + hlist_for_each_entry(pos, head, list) { + if (pos->ae.ssid == ssid && + pos->ae.tsid == tsid && + pos->ae.tclass == tclass) { + avc_node_replace(avc, node, pos); + goto found; } - hlist_add_head_rcu(&node->list, head); -found: - spin_unlock_irqrestore(lock, flag); } -out: + hlist_add_head_rcu(&node->list, head); +found: + spin_unlock_irqrestore(lock, flag); return node; } @@ -758,8 +755,7 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) noinline int slow_avc_audit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, u32 audited, u32 denied, int result, - struct common_audit_data *a, - unsigned int flags) + struct common_audit_data *a) { struct common_audit_data stack_data; struct selinux_audit_data sad; @@ -772,17 +768,6 @@ noinline int slow_avc_audit(struct selinux_state *state, a->type = LSM_AUDIT_DATA_NONE; } - /* - * When in a RCU walk do the audit on the RCU retry. This is because - * the collection of the dname in an inode audit message is not RCU - * safe. Note this may drop some audits when the situation changes - * during retry. However this is logically just as if the operation - * happened a little later. - */ - if ((a->type == LSM_AUDIT_DATA_INODE) && - (flags & MAY_NOT_BLOCK)) - return -ECHILD; - sad.tclass = tclass; sad.requested = requested; sad.ssid = ssid; @@ -855,15 +840,14 @@ static int avc_update_node(struct selinux_avc *avc, /* * If we are in a non-blocking code path, e.g. VFS RCU walk, * then we must not add permissions to a cache entry - * because we cannot safely audit the denial. Otherwise, + * because we will not audit the denial. Otherwise, * during the subsequent blocking retry (e.g. VFS ref walk), we * will find the permissions already granted in the cache entry * and won't audit anything at all, leading to silent denials in * permissive mode that only appear when in enforcing mode. * - * See the corresponding handling in slow_avc_audit(), and the - * logic in selinux_inode_permission for the MAY_NOT_BLOCK flag, - * which is transliterated into AVC_NONBLOCKING. + * See the corresponding handling of MAY_NOT_BLOCK in avc_audit() + * and selinux_inode_permission(). */ if (flags & AVC_NONBLOCKING) return 0; @@ -907,7 +891,7 @@ static int avc_update_node(struct selinux_avc *avc, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - kmem_cache_free(avc_node_cachep, node); + avc_node_kill(avc, node); goto out_unlock; } } @@ -1205,6 +1189,25 @@ int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, return rc; } +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + struct common_audit_data *auditdata, + int flags) +{ + struct av_decision avd; + int rc, rc2; + + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, + (flags & MAY_NOT_BLOCK) ? AVC_NONBLOCKING : 0, + &avd); + + rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, + auditdata, flags); + if (rc2) + return rc2; + return rc; +} + u32 avc_policy_seqno(struct selinux_state *state) { return state->avc->avc_cache.latest_notif; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 116b4d644f68..db44c7eb4321 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2762,6 +2762,14 @@ static int selinux_mount(const char *dev_name, return path_has_perm(cred, path, FILE__MOUNTON); } +static int selinux_move_mount(const struct path *from_path, + const struct path *to_path) +{ + const struct cred *cred = current_cred(); + + return path_has_perm(cred, to_path, FILE__MOUNTON); +} + static int selinux_umount(struct vfsmount *mnt, int flags) { const struct cred *cred = current_cred(); @@ -3004,14 +3012,14 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, if (IS_ERR(isec)) return PTR_ERR(isec); - return avc_has_perm(&selinux_state, - sid, isec->sid, isec->sclass, FILE__READ, &ad); + return avc_has_perm_flags(&selinux_state, + sid, isec->sid, isec->sclass, FILE__READ, &ad, + rcu ? MAY_NOT_BLOCK : 0); } static noinline int audit_inode_permission(struct inode *inode, u32 perms, u32 audited, u32 denied, - int result, - unsigned flags) + int result) { struct common_audit_data ad; struct inode_security_struct *isec = selinux_inode(inode); @@ -3022,7 +3030,7 @@ static noinline int audit_inode_permission(struct inode *inode, rc = slow_avc_audit(&selinux_state, current_sid(), isec->sid, isec->sclass, perms, - audited, denied, result, &ad, flags); + audited, denied, result, &ad); if (rc) return rc; return 0; @@ -3069,7 +3077,11 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (likely(!audited)) return rc; - rc2 = audit_inode_permission(inode, perms, audited, denied, rc, flags); + /* fall back to ref-walk if we have to generate audit */ + if (flags & MAY_NOT_BLOCK) + return -ECHILD; + + rc2 = audit_inode_permission(inode, perms, audited, denied, rc); if (rc2) return rc2; return rc; @@ -6903,6 +6915,8 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(sb_clone_mnt_opts, selinux_sb_clone_mnt_opts), LSM_HOOK_INIT(sb_add_mnt_opt, selinux_add_mnt_opt), + LSM_HOOK_INIT(move_mount, selinux_move_mount), + LSM_HOOK_INIT(dentry_init_security, selinux_dentry_init_security), LSM_HOOK_INIT(dentry_create_files_as, selinux_dentry_create_files_as), diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index 7be0e1e90e8b..cf4cc3ef959b 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -100,8 +100,7 @@ static inline u32 avc_audit_required(u32 requested, int slow_avc_audit(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, u32 audited, u32 denied, int result, - struct common_audit_data *a, - unsigned flags); + struct common_audit_data *a); /** * avc_audit - Audit the granting or denial of permissions. @@ -135,9 +134,12 @@ static inline int avc_audit(struct selinux_state *state, audited = avc_audit_required(requested, avd, result, 0, &denied); if (likely(!audited)) return 0; + /* fall back to ref-walk if we have to generate audit */ + if (flags & MAY_NOT_BLOCK) + return -ECHILD; return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, - a, flags); + a); } #define AVC_STRICT 1 /* Ignore permissive mode. */ @@ -153,6 +155,11 @@ int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata); +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, u32 requested, + struct common_audit_data *auditdata, + int flags); int avc_has_extended_perms(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index ecea41ce919b..8bc7b04769a8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2831,42 +2831,39 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, int addrlen) { int rc = 0; -#if IS_ENABLED(CONFIG_IPV6) - struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; -#endif -#ifdef SMACK_IPV6_SECMARK_LABELING - struct smack_known *rsp; - struct socket_smack *ssp; -#endif if (sock->sk == NULL) return 0; - + if (sock->sk->sk_family != PF_INET && + (!IS_ENABLED(CONFIG_IPV6) || sock->sk->sk_family != PF_INET6)) + return 0; + if (addrlen < offsetofend(struct sockaddr, sa_family)) + return 0; + if (IS_ENABLED(CONFIG_IPV6) && sap->sa_family == AF_INET6) { + struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap; #ifdef SMACK_IPV6_SECMARK_LABELING - ssp = sock->sk->sk_security; + struct smack_known *rsp; #endif - switch (sock->sk->sk_family) { - case PF_INET: - if (addrlen < sizeof(struct sockaddr_in) || - sap->sa_family != AF_INET) - return -EINVAL; - rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); - break; - case PF_INET6: - if (addrlen < SIN6_LEN_RFC2133 || sap->sa_family != AF_INET6) - return -EINVAL; + if (addrlen < SIN6_LEN_RFC2133) + return 0; #ifdef SMACK_IPV6_SECMARK_LABELING rsp = smack_ipv6host_label(sip); - if (rsp != NULL) + if (rsp != NULL) { + struct socket_smack *ssp = sock->sk->sk_security; + rc = smk_ipv6_check(ssp->smk_out, rsp, sip, - SMK_CONNECTING); + SMK_CONNECTING); + } #endif #ifdef SMACK_IPV6_PORT_LABELING rc = smk_ipv6_port_check(sock->sk, sip, SMK_CONNECTING); #endif - break; + return rc; } + if (sap->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in)) + return 0; + rc = smack_netlabel_send(sock->sk, (struct sockaddr_in *)sap); return rc; } diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index c36bafbcd77e..1b467381986f 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -2322,9 +2322,9 @@ static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = { [TOMOYO_MEMORY_QUERY] = "query message:", }; -/* Timestamp counter for last updated. */ -static unsigned int tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT]; /* Counter for number of updates. */ +static atomic_t tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT]; +/* Timestamp counter for last updated. */ static time64_t tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT]; /** @@ -2336,10 +2336,7 @@ static time64_t tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT]; */ void tomoyo_update_stat(const u8 index) { - /* - * I don't use atomic operations because race condition is not fatal. - */ - tomoyo_stat_updated[index]++; + atomic_inc(&tomoyo_stat_updated[index]); tomoyo_stat_modified[index] = ktime_get_real_seconds(); } @@ -2360,7 +2357,7 @@ static void tomoyo_read_stat(struct tomoyo_io_buffer *head) for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) { tomoyo_io_printf(head, "Policy %-30s %10u", tomoyo_policy_headers[i], - tomoyo_stat_updated[i]); + atomic_read(&tomoyo_stat_updated[i])); if (tomoyo_stat_modified[i]) { struct tomoyo_time stamp; diff --git a/sound/core/control.c b/sound/core/control.c index 7a4d8690ce41..08ca7666e84c 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1430,8 +1430,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, if (kctl->tlv.c == NULL) return -ENXIO; - /* When locked, this is unavailable. */ - if (vd->owner != NULL && vd->owner != file) + /* Write and command operations are not allowed for locked element. */ + if (op_flag != SNDRV_CTL_TLV_OP_READ && + vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 31cb2acf8afc..732bbede7ebf 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); @@ -209,6 +209,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) drv_frames = plugin->src_frames(plugin, drv_frames); @@ -220,6 +222,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_next = plugin->next; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin = plugin_next; } } else @@ -248,11 +252,15 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index d083225344a0..df40d38f6e29 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -785,10 +785,22 @@ static int snd_pcm_hw_params_user(struct snd_pcm_substream *substream, return err; } +static int do_hw_free(struct snd_pcm_substream *substream) +{ + int result = 0; + + snd_pcm_sync_stop(substream); + if (substream->ops->hw_free) + result = substream->ops->hw_free(substream); + if (substream->managed_buffer_alloc) + snd_pcm_lib_free_pages(substream); + return result; +} + static int snd_pcm_hw_free(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime; - int result = 0; + int result; if (PCM_RUNTIME_CHECK(substream)) return -ENXIO; @@ -805,11 +817,7 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream) snd_pcm_stream_unlock_irq(substream); if (atomic_read(&substream->mmap_count)) return -EBADFD; - snd_pcm_sync_stop(substream); - if (substream->ops->hw_free) - result = substream->ops->hw_free(substream); - if (substream->managed_buffer_alloc) - snd_pcm_lib_free_pages(substream); + result = do_hw_free(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_OPEN); pm_qos_remove_request(&substream->latency_pm_qos_req); return result; @@ -2466,9 +2474,8 @@ void snd_pcm_release_substream(struct snd_pcm_substream *substream) snd_pcm_drop(substream); if (substream->hw_opened) { - if (substream->ops->hw_free && - substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) - substream->ops->hw_free(substream); + if (substream->runtime->status->state != SNDRV_PCM_STATE_OPEN) + do_hw_free(substream); substream->ops->close(substream); substream->hw_opened = 0; } diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index a88c235b2ea3..2ddfe2226651 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -602,6 +602,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); + snd_midi_event_reset_decode(mdev->coder); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 6d9592f0ae1d..cc93157fa950 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -580,7 +580,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1659,7 +1659,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index caf68bf42f13..71a6ea62c3be 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -238,6 +238,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -254,17 +256,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); @@ -392,6 +395,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -401,8 +405,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -539,15 +545,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -739,6 +747,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -750,9 +760,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index be59b59c9be4..1645e4142e30 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -428,14 +428,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -452,7 +453,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 66c3e344eae3..4bec57df8158 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -120,7 +120,8 @@ int snd_seq_timer_set_tempo_ppq(struct snd_seq_timer *tmr, int tempo, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 626d87c1539b..77d7037d1476 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -81,6 +81,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); if (len > 0) diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 022a0db692e0..3a2839f44f5c 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -915,7 +915,7 @@ static void print_formats(struct snd_dummy *dummy, { int i; - for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { if (dummy->pcm_hw.formats & (1ULL << i)) snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); } diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c index cfab60d88c92..09ff209df4a3 100644 --- a/sound/hda/ext/hdac_ext_controller.c +++ b/sound/hda/ext/hdac_ext_controller.c @@ -254,6 +254,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_bus_link_power_down_all); int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, struct hdac_ext_link *link) { + unsigned long codec_mask; int ret = 0; mutex_lock(&bus->lock); @@ -280,9 +281,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus, * HDA spec section 4.3 - Codec Discovery */ udelay(521); - bus->codec_mask = snd_hdac_chip_readw(bus, STATESTS); - dev_dbg(bus->dev, "codec_mask = 0x%lx\n", bus->codec_mask); - snd_hdac_chip_writew(bus, STATESTS, bus->codec_mask); + codec_mask = snd_hdac_chip_readw(bus, STATESTS); + dev_dbg(bus->dev, "codec_mask = 0x%lx\n", codec_mask); + snd_hdac_chip_writew(bus, STATESTS, codec_mask); + if (!bus->codec_mask) + bus->codec_mask = codec_mask; } mutex_unlock(&bus->lock); diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index 886cb7811bd6..2efee794cac6 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -250,7 +250,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a2fb19129219..6cb72336433a 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4019,7 +4019,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index d081fb2880a0..82cf1da2ff12 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -360,7 +360,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 8ef223aa1e37..9d3b28bcba9b 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2188,6 +2188,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { /* https://bugzilla.redhat.com/show_bug.cgi?id=1581607 */ SND_PCI_QUIRK(0x1558, 0x3501, "Clevo W35xSS_370SS", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ + SND_PCI_QUIRK(0x1558, 0x6504, "Clevo W65_67SB", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1028, 0x0497, "Dell Precision T3600", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ /* Note the P55A-UD3 and Z87-D3HP share the subsys id for the HDA dev */ @@ -2447,6 +2449,8 @@ static const struct pci_device_id azx_ids[] = { /* Jasperlake */ { PCI_DEVICE(0x8086, 0x38c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4dc8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Tigerlake */ { PCI_DEVICE(0x8086, 0xa0c8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index fcc34417cbce..6dbe99131bc4 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -222,7 +222,7 @@ static ssize_t init_verbs_show(struct device *dev, int i, len = 0; mutex_lock(&codec->user_mutex); snd_array_for_each(&codec->init_verbs, i, v) { - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -272,7 +272,7 @@ static ssize_t hints_show(struct device *dev, int i, len = 0; mutex_lock(&codec->user_mutex); snd_array_for_each(&codec->hints, i, hint) { - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c index 8350954b7986..e5191584638a 100644 --- a/sound/pci/hda/hda_tegra.c +++ b/sound/pci/hda/hda_tegra.c @@ -398,6 +398,7 @@ static int hda_tegra_create(struct snd_card *card, return err; chip->bus.needs_damn_long_delay = 1; + chip->bus.core.aligned_mmio = 1; err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 90aa0f400a57..1e20e85e9b46 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -922,6 +922,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 630b1f5c276d..4f195c7d966a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1547,6 +1547,34 @@ static bool update_eld(struct hda_codec *codec, return eld_changed; } +static struct snd_jack *pin_idx_to_pcm_jack(struct hda_codec *codec, + struct hdmi_spec_per_pin *per_pin) +{ + struct hdmi_spec *spec = codec->spec; + struct snd_jack *jack = NULL; + struct hda_jack_tbl *jack_tbl; + + /* if !dyn_pcm_assign, get jack from hda_jack_tbl + * in !dyn_pcm_assign case, spec->pcm_rec[].jack is not + * NULL even after snd_hda_jack_tbl_clear() is called to + * free snd_jack. This may cause access invalid memory + * when calling snd_jack_report + */ + if (per_pin->pcm_idx >= 0 && spec->dyn_pcm_assign) { + jack = spec->pcm_rec[per_pin->pcm_idx].jack; + } else if (!spec->dyn_pcm_assign) { + /* + * jack tbl doesn't support DP MST + * DP MST will use dyn_pcm_assign, + * so DP MST will never come here + */ + jack_tbl = snd_hda_jack_tbl_get_mst(codec, per_pin->pin_nid, + per_pin->dev_id); + if (jack_tbl) + jack = jack_tbl->jack; + } + return jack; +} /* update ELD and jack state via HD-audio verbs */ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int repoll) @@ -1568,6 +1596,7 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, int present; bool ret; bool do_repoll = false; + struct snd_jack *pcm_jack = NULL; present = snd_hda_jack_pin_sense(codec, pin_nid, dev_id); @@ -1595,10 +1624,19 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, do_repoll = true; } - if (do_repoll) + if (do_repoll) { schedule_delayed_work(&per_pin->work, msecs_to_jiffies(300)); - else + } else { + /* + * pcm_idx >=0 before update_eld() means it is in monitor + * disconnected event. Jack must be fetched before + * update_eld(). + */ + pcm_jack = pin_idx_to_pcm_jack(codec, per_pin); update_eld(codec, per_pin, eld); + if (!pcm_jack) + pcm_jack = pin_idx_to_pcm_jack(codec, per_pin); + } ret = !repoll || !eld->monitor_present || eld->eld_valid; @@ -1607,38 +1645,32 @@ static bool hdmi_present_sense_via_verbs(struct hdmi_spec_per_pin *per_pin, jack->block_report = !ret; jack->pin_sense = (eld->monitor_present && eld->eld_valid) ? AC_PINSENSE_PRESENCE : 0; - } - mutex_unlock(&per_pin->lock); - return ret; -} -static struct snd_jack *pin_idx_to_jack(struct hda_codec *codec, - struct hdmi_spec_per_pin *per_pin) -{ - struct hdmi_spec *spec = codec->spec; - struct snd_jack *jack = NULL; - struct hda_jack_tbl *jack_tbl; + if (spec->dyn_pcm_assign && pcm_jack && !do_repoll) { + int state = 0; + + if (jack->pin_sense & AC_PINSENSE_PRESENCE) + state = SND_JACK_AVOUT; + snd_jack_report(pcm_jack, state); + } - /* if !dyn_pcm_assign, get jack from hda_jack_tbl - * in !dyn_pcm_assign case, spec->pcm_rec[].jack is not - * NULL even after snd_hda_jack_tbl_clear() is called to - * free snd_jack. This may cause access invalid memory - * when calling snd_jack_report - */ - if (per_pin->pcm_idx >= 0 && spec->dyn_pcm_assign) - jack = spec->pcm_rec[per_pin->pcm_idx].jack; - else if (!spec->dyn_pcm_assign) { /* - * jack tbl doesn't support DP MST - * DP MST will use dyn_pcm_assign, - * so DP MST will never come here + * snd_hda_jack_pin_sense() call at the beginning of this + * function, updates jack->pins_sense and clears + * jack->jack_dirty, therefore snd_hda_jack_report_sync() will + * not override the jack->pin_sense. + * + * snd_hda_jack_report_sync() is superfluous for dyn_pcm_assign + * case. The jack->pin_sense update was already performed, and + * hda_jack->jack is NULL for dyn_pcm_assign. + * + * Don't call snd_hda_jack_report_sync() for + * dyn_pcm_assign. */ - jack_tbl = snd_hda_jack_tbl_get_mst(codec, per_pin->pin_nid, - per_pin->dev_id); - if (jack_tbl) - jack = jack_tbl->jack; + ret = ret && !spec->dyn_pcm_assign; } - return jack; + mutex_unlock(&per_pin->lock); + return ret; } /* update ELD and jack state via audio component */ @@ -1674,10 +1706,10 @@ static void sync_eld_via_acomp(struct hda_codec *codec, /* pcm_idx >=0 before update_eld() means it is in monitor * disconnected event. Jack must be fetched before update_eld() */ - jack = pin_idx_to_jack(codec, per_pin); + jack = pin_idx_to_pcm_jack(codec, per_pin); changed = update_eld(codec, per_pin, eld); if (jack == NULL) - jack = pin_idx_to_jack(codec, per_pin); + jack = pin_idx_to_pcm_jack(codec, per_pin); if (changed && jack) snd_jack_report(jack, (eld->monitor_present && eld->eld_valid) ? @@ -2830,9 +2862,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec) /* parse and post-process for Intel codecs */ static int parse_intel_hdmi(struct hda_codec *codec) { - int err; + int err, retries = 3; + + do { + err = hdmi_parse_codec(codec); + } while (err < 0 && retries--); - err = hdmi_parse_codec(codec); if (err < 0) { generic_spec_free(codec); return err; @@ -4250,6 +4285,7 @@ HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi), HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f2ea3528bfb1..f44d8e258933 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -949,7 +949,7 @@ struct alc_codec_rename_pci_table { const char *name; }; -static struct alc_codec_rename_table rename_tbl[] = { +static const struct alc_codec_rename_table rename_tbl[] = { { 0x10ec0221, 0xf00f, 0x1003, "ALC231" }, { 0x10ec0269, 0xfff0, 0x3010, "ALC277" }, { 0x10ec0269, 0xf0f0, 0x2010, "ALC259" }, @@ -970,7 +970,7 @@ static struct alc_codec_rename_table rename_tbl[] = { { } /* terminator */ }; -static struct alc_codec_rename_pci_table rename_pci_tbl[] = { +static const struct alc_codec_rename_pci_table rename_pci_tbl[] = { { 0x10ec0280, 0x1028, 0, "ALC3220" }, { 0x10ec0282, 0x1028, 0, "ALC3221" }, { 0x10ec0283, 0x1028, 0, "ALC3223" }, @@ -2447,6 +2447,10 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), @@ -2996,7 +3000,7 @@ static void alc269_shutup(struct hda_codec *codec) alc_shutup_pins(codec); } -static struct coef_fw alc282_coefs[] = { +static const struct coef_fw alc282_coefs[] = { WRITE_COEF(0x03, 0x0002), /* Power Down Control */ UPDATE_COEF(0x05, 0xff3f, 0x0700), /* FIFO and filter clock */ WRITE_COEF(0x07, 0x0200), /* DMIC control */ @@ -3108,7 +3112,7 @@ static void alc282_shutup(struct hda_codec *codec) alc_write_coef_idx(codec, 0x78, coef78); } -static struct coef_fw alc283_coefs[] = { +static const struct coef_fw alc283_coefs[] = { WRITE_COEF(0x03, 0x0002), /* Power Down Control */ UPDATE_COEF(0x05, 0xff3f, 0x0700), /* FIFO and filter clock */ WRITE_COEF(0x07, 0x0200), /* DMIC control */ @@ -4184,7 +4188,7 @@ static void alc269_fixup_hp_line1_mic1_led(struct hda_codec *codec, } } -static struct coef_fw alc225_pre_hsmode[] = { +static const struct coef_fw alc225_pre_hsmode[] = { UPDATE_COEF(0x4a, 1<<8, 0), UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), UPDATE_COEF(0x63, 3<<14, 3<<14), @@ -4197,7 +4201,7 @@ static struct coef_fw alc225_pre_hsmode[] = { static void alc_headset_mode_unplugged(struct hda_codec *codec) { - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEF(0x1b, 0x0c0b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/ @@ -4205,7 +4209,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) WRITE_COEFEX(0x57, 0x03, 0x8aa6), /* Direct Drive HP Amp control */ {} }; - static struct coef_fw coef0256[] = { + static const struct coef_fw coef0256[] = { WRITE_COEF(0x1b, 0x0c4b), /* LDO and MISC control */ WRITE_COEF(0x45, 0xd089), /* UAJ function set to menual mode */ WRITE_COEF(0x06, 0x6104), /* Set MIC2 Vref gate with HP */ @@ -4213,7 +4217,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) UPDATE_COEFEX(0x57, 0x05, 1<<14, 0), /* Direct Drive HP Amp control(Set to verb control)*/ {} }; - static struct coef_fw coef0233[] = { + static const struct coef_fw coef0233[] = { WRITE_COEF(0x1b, 0x0c0b), WRITE_COEF(0x45, 0xc429), UPDATE_COEF(0x35, 0x4000, 0), @@ -4223,7 +4227,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) WRITE_COEF(0x32, 0x42a3), {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x4f, 0xfcc0, 0xc400), UPDATE_COEF(0x50, 0x2000, 0x2000), UPDATE_COEF(0x56, 0x0006, 0x0006), @@ -4231,18 +4235,18 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) UPDATE_COEF(0x67, 0x2000, 0), {} }; - static struct coef_fw coef0298[] = { + static const struct coef_fw coef0298[] = { UPDATE_COEF(0x19, 0x1300, 0x0300), {} }; - static struct coef_fw coef0292[] = { + static const struct coef_fw coef0292[] = { WRITE_COEF(0x76, 0x000e), WRITE_COEF(0x6c, 0x2400), WRITE_COEF(0x18, 0x7308), WRITE_COEF(0x6b, 0xc429), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { UPDATE_COEF(0x10, 7<<8, 6<<8), /* SET Line1 JD to 0 */ UPDATE_COEFEX(0x57, 0x05, 1<<15|1<<13, 0x0), /* SET charge pump by verb */ UPDATE_COEFEX(0x57, 0x03, 1<<10, 1<<10), /* SET EN_OSW to 1 */ @@ -4251,16 +4255,16 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) UPDATE_COEF(0x4a, 0x000f, 0x000e), /* Combo Jack auto detect */ {} }; - static struct coef_fw coef0668[] = { + static const struct coef_fw coef0668[] = { WRITE_COEF(0x15, 0x0d40), WRITE_COEF(0xb7, 0x802b), {} }; - static struct coef_fw coef0225[] = { + static const struct coef_fw coef0225[] = { UPDATE_COEF(0x63, 3<<14, 0), {} }; - static struct coef_fw coef0274[] = { + static const struct coef_fw coef0274[] = { UPDATE_COEF(0x4a, 0x0100, 0), UPDATE_COEFEX(0x57, 0x05, 0x4000, 0), UPDATE_COEF(0x6b, 0xf000, 0x5000), @@ -4325,25 +4329,25 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec) static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, hda_nid_t mic_pin) { - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEFEX(0x57, 0x03, 0x8aa6), WRITE_COEF(0x06, 0x6100), /* Set MIC2 Vref gate to normal */ {} }; - static struct coef_fw coef0256[] = { + static const struct coef_fw coef0256[] = { UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14), /* Direct Drive HP Amp control(Set to verb control)*/ WRITE_COEFEX(0x57, 0x03, 0x09a3), WRITE_COEF(0x06, 0x6100), /* Set MIC2 Vref gate to normal */ {} }; - static struct coef_fw coef0233[] = { + static const struct coef_fw coef0233[] = { UPDATE_COEF(0x35, 0, 1<<14), WRITE_COEF(0x06, 0x2100), WRITE_COEF(0x1a, 0x0021), WRITE_COEF(0x26, 0x008c), {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x4f, 0x00c0, 0), UPDATE_COEF(0x50, 0x2000, 0), UPDATE_COEF(0x56, 0x0006, 0), @@ -4352,30 +4356,30 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, UPDATE_COEF(0x67, 0x2000, 0x2000), {} }; - static struct coef_fw coef0292[] = { + static const struct coef_fw coef0292[] = { WRITE_COEF(0x19, 0xa208), WRITE_COEF(0x2e, 0xacf0), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { UPDATE_COEFEX(0x57, 0x05, 0, 1<<15|1<<13), /* SET charge pump by verb */ UPDATE_COEFEX(0x57, 0x03, 1<<10, 0), /* SET EN_OSW to 0 */ UPDATE_COEF(0x1a, 1<<3, 0), /* Combo JD gating without LINE1-VREFO */ {} }; - static struct coef_fw coef0688[] = { + static const struct coef_fw coef0688[] = { WRITE_COEF(0xb7, 0x802b), WRITE_COEF(0xb5, 0x1040), UPDATE_COEF(0xc3, 0, 1<<12), {} }; - static struct coef_fw coef0225[] = { + static const struct coef_fw coef0225[] = { UPDATE_COEFEX(0x57, 0x05, 1<<14, 1<<14), UPDATE_COEF(0x4a, 3<<4, 2<<4), UPDATE_COEF(0x63, 3<<14, 0), {} }; - static struct coef_fw coef0274[] = { + static const struct coef_fw coef0274[] = { UPDATE_COEFEX(0x57, 0x05, 0x4000, 0x4000), UPDATE_COEF(0x4a, 0x0010, 0), UPDATE_COEF(0x6b, 0xf000, 0), @@ -4461,7 +4465,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin, static void alc_headset_mode_default(struct hda_codec *codec) { - static struct coef_fw coef0225[] = { + static const struct coef_fw coef0225[] = { UPDATE_COEF(0x45, 0x3f<<10, 0x30<<10), UPDATE_COEF(0x45, 0x3f<<10, 0x31<<10), UPDATE_COEF(0x49, 3<<8, 0<<8), @@ -4470,14 +4474,14 @@ static void alc_headset_mode_default(struct hda_codec *codec) UPDATE_COEF(0x67, 0xf000, 0x3000), {} }; - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xc089), WRITE_COEF(0x45, 0xc489), WRITE_COEFEX(0x57, 0x03, 0x8ea6), WRITE_COEF(0x49, 0x0049), {} }; - static struct coef_fw coef0256[] = { + static const struct coef_fw coef0256[] = { WRITE_COEF(0x45, 0xc489), WRITE_COEFEX(0x57, 0x03, 0x0da3), WRITE_COEF(0x49, 0x0049), @@ -4485,12 +4489,12 @@ static void alc_headset_mode_default(struct hda_codec *codec) WRITE_COEF(0x06, 0x6100), {} }; - static struct coef_fw coef0233[] = { + static const struct coef_fw coef0233[] = { WRITE_COEF(0x06, 0x2100), WRITE_COEF(0x32, 0x4ea3), {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x4f, 0xfcc0, 0xc400), /* Set to TRS type */ UPDATE_COEF(0x50, 0x2000, 0x2000), UPDATE_COEF(0x56, 0x0006, 0x0006), @@ -4498,26 +4502,26 @@ static void alc_headset_mode_default(struct hda_codec *codec) UPDATE_COEF(0x67, 0x2000, 0), {} }; - static struct coef_fw coef0292[] = { + static const struct coef_fw coef0292[] = { WRITE_COEF(0x76, 0x000e), WRITE_COEF(0x6c, 0x2400), WRITE_COEF(0x6b, 0xc429), WRITE_COEF(0x18, 0x7308), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { UPDATE_COEF(0x4a, 0x000f, 0x000e), /* Combo Jack auto detect */ WRITE_COEF(0x45, 0xC429), /* Set to TRS type */ UPDATE_COEF(0x1a, 1<<3, 0), /* Combo JD gating without LINE1-VREFO */ {} }; - static struct coef_fw coef0688[] = { + static const struct coef_fw coef0688[] = { WRITE_COEF(0x11, 0x0041), WRITE_COEF(0x15, 0x0d40), WRITE_COEF(0xb7, 0x802b), {} }; - static struct coef_fw coef0274[] = { + static const struct coef_fw coef0274[] = { WRITE_COEF(0x45, 0x4289), UPDATE_COEF(0x4a, 0x0010, 0x0010), UPDATE_COEF(0x6b, 0x0f00, 0), @@ -4580,53 +4584,53 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) { int val; - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */ WRITE_COEF(0x1b, 0x0c2b), WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; - static struct coef_fw coef0256[] = { + static const struct coef_fw coef0256[] = { WRITE_COEF(0x45, 0xd489), /* Set to CTIA type */ WRITE_COEF(0x1b, 0x0e6b), {} }; - static struct coef_fw coef0233[] = { + static const struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xd429), WRITE_COEF(0x1b, 0x0c2b), WRITE_COEF(0x32, 0x4ea3), {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x50, 0x2000, 0x2000), UPDATE_COEF(0x56, 0x0006, 0x0006), UPDATE_COEF(0x66, 0x0008, 0), UPDATE_COEF(0x67, 0x2000, 0), {} }; - static struct coef_fw coef0292[] = { + static const struct coef_fw coef0292[] = { WRITE_COEF(0x6b, 0xd429), WRITE_COEF(0x76, 0x0008), WRITE_COEF(0x18, 0x7388), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { WRITE_COEF(0x45, 0xd429), /* Set to ctia type */ UPDATE_COEF(0x10, 7<<8, 7<<8), /* SET Line1 JD to 1 */ {} }; - static struct coef_fw coef0688[] = { + static const struct coef_fw coef0688[] = { WRITE_COEF(0x11, 0x0001), WRITE_COEF(0x15, 0x0d60), WRITE_COEF(0xc3, 0x0000), {} }; - static struct coef_fw coef0225_1[] = { + static const struct coef_fw coef0225_1[] = { UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10), UPDATE_COEF(0x63, 3<<14, 2<<14), {} }; - static struct coef_fw coef0225_2[] = { + static const struct coef_fw coef0225_2[] = { UPDATE_COEF(0x45, 0x3f<<10, 0x35<<10), UPDATE_COEF(0x63, 3<<14, 1<<14), {} @@ -4698,48 +4702,48 @@ static void alc_headset_mode_ctia(struct hda_codec *codec) /* Nokia type */ static void alc_headset_mode_omtp(struct hda_codec *codec) { - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */ WRITE_COEF(0x1b, 0x0c2b), WRITE_COEFEX(0x57, 0x03, 0x8ea6), {} }; - static struct coef_fw coef0256[] = { + static const struct coef_fw coef0256[] = { WRITE_COEF(0x45, 0xe489), /* Set to OMTP Type */ WRITE_COEF(0x1b, 0x0e6b), {} }; - static struct coef_fw coef0233[] = { + static const struct coef_fw coef0233[] = { WRITE_COEF(0x45, 0xe429), WRITE_COEF(0x1b, 0x0c2b), WRITE_COEF(0x32, 0x4ea3), {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x50, 0x2000, 0x2000), UPDATE_COEF(0x56, 0x0006, 0x0006), UPDATE_COEF(0x66, 0x0008, 0), UPDATE_COEF(0x67, 0x2000, 0), {} }; - static struct coef_fw coef0292[] = { + static const struct coef_fw coef0292[] = { WRITE_COEF(0x6b, 0xe429), WRITE_COEF(0x76, 0x0008), WRITE_COEF(0x18, 0x7388), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { WRITE_COEF(0x45, 0xe429), /* Set to omtp type */ UPDATE_COEF(0x10, 7<<8, 7<<8), /* SET Line1 JD to 1 */ {} }; - static struct coef_fw coef0688[] = { + static const struct coef_fw coef0688[] = { WRITE_COEF(0x11, 0x0001), WRITE_COEF(0x15, 0x0d50), WRITE_COEF(0xc3, 0x0000), {} }; - static struct coef_fw coef0225[] = { + static const struct coef_fw coef0225[] = { UPDATE_COEF(0x45, 0x3f<<10, 0x39<<10), UPDATE_COEF(0x63, 3<<14, 2<<14), {} @@ -4799,17 +4803,17 @@ static void alc_determine_headset_type(struct hda_codec *codec) int val; bool is_ctia = false; struct alc_spec *spec = codec->spec; - static struct coef_fw coef0255[] = { + static const struct coef_fw coef0255[] = { WRITE_COEF(0x45, 0xd089), /* combo jack auto switch control(Check type)*/ WRITE_COEF(0x49, 0x0149), /* combo jack auto switch control(Vref conteol) */ {} }; - static struct coef_fw coef0288[] = { + static const struct coef_fw coef0288[] = { UPDATE_COEF(0x4f, 0xfcc0, 0xd400), /* Check Type */ {} }; - static struct coef_fw coef0298[] = { + static const struct coef_fw coef0298[] = { UPDATE_COEF(0x50, 0x2000, 0x2000), UPDATE_COEF(0x56, 0x0006, 0x0006), UPDATE_COEF(0x66, 0x0008, 0), @@ -4817,19 +4821,19 @@ static void alc_determine_headset_type(struct hda_codec *codec) UPDATE_COEF(0x19, 0x1300, 0x1300), {} }; - static struct coef_fw coef0293[] = { + static const struct coef_fw coef0293[] = { UPDATE_COEF(0x4a, 0x000f, 0x0008), /* Combo Jack auto detect */ WRITE_COEF(0x45, 0xD429), /* Set to ctia type */ {} }; - static struct coef_fw coef0688[] = { + static const struct coef_fw coef0688[] = { WRITE_COEF(0x11, 0x0001), WRITE_COEF(0xb7, 0x802b), WRITE_COEF(0x15, 0x0d60), WRITE_COEF(0xc3, 0x0c00), {} }; - static struct coef_fw coef0274[] = { + static const struct coef_fw coef0274[] = { UPDATE_COEF(0x4a, 0x0010, 0), UPDATE_COEF(0x4a, 0x8000, 0), WRITE_COEF(0x45, 0xd289), @@ -5116,7 +5120,7 @@ static void alc_fixup_headset_mode_no_hp_mic(struct hda_codec *codec, static void alc255_set_default_jack_type(struct hda_codec *codec) { /* Set to iphone type */ - static struct coef_fw alc255fw[] = { + static const struct coef_fw alc255fw[] = { WRITE_COEF(0x1b, 0x880b), WRITE_COEF(0x45, 0xd089), WRITE_COEF(0x1b, 0x080b), @@ -5124,7 +5128,7 @@ static void alc255_set_default_jack_type(struct hda_codec *codec) WRITE_COEF(0x1b, 0x0c0b), {} }; - static struct coef_fw alc256fw[] = { + static const struct coef_fw alc256fw[] = { WRITE_COEF(0x1b, 0x884b), WRITE_COEF(0x45, 0xd089), WRITE_COEF(0x1b, 0x084b), @@ -5701,8 +5705,11 @@ static void alc_fixup_headset_jack(struct hda_codec *codec, break; case HDA_FIXUP_ACT_INIT: switch (codec->core.vendor_id) { + case 0x10ec0215: case 0x10ec0225: + case 0x10ec0285: case 0x10ec0295: + case 0x10ec0289: case 0x10ec0299: alc_write_coef_idx(codec, 0x48, 0xd011); alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045); @@ -5848,6 +5855,7 @@ enum { ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, ALC288_FIXUP_DISABLE_AAMIX, + ALC292_FIXUP_DELL_E7X_AAMIX, ALC292_FIXUP_DELL_E7X, ALC292_FIXUP_DISABLE_AAMIX, ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK, @@ -5913,7 +5921,8 @@ enum { ALC289_FIXUP_DUAL_SPK, ALC294_FIXUP_SPK2_TO_DAC1, ALC294_FIXUP_ASUS_DUAL_SPK, - + ALC285_FIXUP_THINKPAD_HEADSET_JACK, + ALC294_FIXUP_ASUS_HPE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6543,12 +6552,19 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC293_FIXUP_DELL1_MIC_NO_PRESENCE }, - [ALC292_FIXUP_DELL_E7X] = { + [ALC292_FIXUP_DELL_E7X_AAMIX] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_dell_xps13, .chained = true, .chain_id = ALC292_FIXUP_DISABLE_AAMIX }, + [ALC292_FIXUP_DELL_E7X] = { + .type = HDA_FIXUP_FUNC, + .v.func = snd_hda_gen_fixup_micmute_led, + /* micmute fixup must be applied at last */ + .chained_before = true, + .chain_id = ALC292_FIXUP_DELL_E7X_AAMIX, + }, [ALC298_FIXUP_ALIENWARE_MIC_NO_PRESENCE] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -6670,6 +6686,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC285_FIXUP_SPEAKER2_TO_DAC1] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_speaker2_to_dac1, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI }, [ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = { .type = HDA_FIXUP_PINS, @@ -7026,7 +7044,23 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC294_FIXUP_SPK2_TO_DAC1 }, - + [ALC285_FIXUP_THINKPAD_HEADSET_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_headset_jack, + .chained = true, + .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1 + }, + [ALC294_FIXUP_ASUS_HPE] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* Set EAPD high */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x0f }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x7774 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7101,6 +7135,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB), SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x098d, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x09bf, "Dell Precision", ALC233_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -7190,6 +7226,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC), @@ -7260,8 +7297,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x224c, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x224d, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x225d, "Thinkpad T480", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), - SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1), - SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_SPEAKER2_TO_DAC1), + SND_PCI_QUIRK(0x17aa, 0x2292, "Thinkpad X1 Yoga 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), + SND_PCI_QUIRK(0x17aa, 0x2293, "Thinkpad X1 Carbon 7th", ALC285_FIXUP_THINKPAD_HEADSET_JACK), SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), @@ -8014,6 +8051,8 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; break; case 0x10ec0225: + codec->power_save_node = 1; + /* fall through */ case 0x10ec0295: case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; @@ -8476,7 +8515,30 @@ static void alc662_fixup_aspire_ethos_hp(struct hda_codec *codec, } } -static struct coef_fw alc668_coefs[] = { +static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + static const struct hda_pintbl pincfgs[] = { + { 0x19, 0x02a11040 }, /* use as headset mic, with its own jack detect */ + { 0x1b, 0x0181304f }, + { } + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.mixer_nid = 0; + spec->parse_flags |= HDA_PINCFG_HEADSET_MIC; + snd_hda_apply_pincfgs(codec, pincfgs); + break; + case HDA_FIXUP_ACT_INIT: + alc_write_coef_idx(codec, 0x19, 0xa054); + break; + } +} + +static const struct coef_fw alc668_coefs[] = { WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0), WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80), WRITE_COEF(0x08, 0x0031), WRITE_COEF(0x0a, 0x0060), WRITE_COEF(0x0b, 0x0), @@ -8549,6 +8611,9 @@ enum { ALC662_FIXUP_LENOVO_MULTI_CODECS, ALC669_FIXUP_ACER_ASPIRE_ETHOS, ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET, + ALC671_FIXUP_HP_HEADSET_MIC2, + ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, + ALC662_FIXUP_ACER_NITRO_HEADSET_MODE, }; static const struct hda_fixup alc662_fixups[] = { @@ -8890,6 +8955,29 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET }, + [ALC671_FIXUP_HP_HEADSET_MIC2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc671_fixup_hp_headset_mic2, + }, + [ALC662_FIXUP_ACER_X2660G_HEADSET_MODE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC662_FIXUP_USI_FUNC + }, + [ALC662_FIXUP_ACER_NITRO_HEADSET_MODE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */ + { 0x1b, 0x0221144f }, + { } + }, + .chained = true, + .chain_id = ALC662_FIXUP_USI_FUNC + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -8901,6 +8989,8 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), + SND_PCI_QUIRK(0x1025, 0x123c, "Acer Nitro N50-600", ALC662_FIXUP_ACER_NITRO_HEADSET_MODE), + SND_PCI_QUIRK(0x1025, 0x124e, "Acer 2660G", ALC662_FIXUP_ACER_X2660G_HEADSET_MODE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13), @@ -9072,6 +9162,23 @@ static const struct snd_hda_pin_quirk alc662_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x14, 0x90170110}, {0x15, 0x0321101f}), + SND_HDA_PIN_QUIRK(0x10ec0671, 0x103c, "HP cPC", ALC671_FIXUP_HP_HEADSET_MIC2, + {0x14, 0x01014010}, + {0x17, 0x90170150}, + {0x19, 0x02a11060}, + {0x1b, 0x01813030}, + {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0671, 0x103c, "HP cPC", ALC671_FIXUP_HP_HEADSET_MIC2, + {0x14, 0x01014010}, + {0x18, 0x01a19040}, + {0x1b, 0x01813030}, + {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0671, 0x103c, "HP cPC", ALC671_FIXUP_HP_HEADSET_MIC2, + {0x14, 0x01014020}, + {0x17, 0x90170110}, + {0x18, 0x01a19050}, + {0x1b, 0x01813040}, + {0x21, 0x02211030}), {} }; diff --git a/sound/sh/aica.c b/sound/sh/aica.c index bf1fb0d8a930..f69072d2564c 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -101,10 +101,10 @@ static void spu_memset(u32 toi, u32 what, int length) } /* spu_memload - write to SPU address space */ -static void spu_memload(u32 toi, void *from, int length) +static void spu_memload(u32 toi, const void *from, int length) { unsigned long flags; - u32 *froml = from; + const u32 *froml = from; u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi); int i; u32 val; diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index f9e36abc98ac..725992937e8f 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -175,7 +175,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; if (copy_from_user_toio(chip->data_buffer + pos, src, count)) return -EFAULT; @@ -195,7 +194,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memcpy_toio(chip->data_buffer + pos, src, count); chip->buffer_end = chip->data_buffer + pos + count; @@ -214,7 +212,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memset_io(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index f118c229ed82..71f2d42188c4 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -10,15 +10,17 @@ config SND_ATMEL_SOC if SND_ATMEL_SOC config SND_ATMEL_SOC_PDC - tristate + bool depends on HAS_DMA config SND_ATMEL_SOC_DMA - tristate + bool select SND_SOC_GENERIC_DMAENGINE_PCM config SND_ATMEL_SOC_SSC tristate + select SND_ATMEL_SOC_DMA + select SND_ATMEL_SOC_PDC config SND_ATMEL_SOC_SSC_PDC tristate "SoC PCM DAI support for AT91 SSC controller using PDC" diff --git a/sound/soc/atmel/Makefile b/sound/soc/atmel/Makefile index 1f6890ed3738..c7d2989791be 100644 --- a/sound/soc/atmel/Makefile +++ b/sound/soc/atmel/Makefile @@ -6,8 +6,14 @@ snd-soc-atmel_ssc_dai-objs := atmel_ssc_dai.o snd-soc-atmel-i2s-objs := atmel-i2s.o snd-soc-mchp-i2s-mcc-objs := mchp-i2s-mcc.o -obj-$(CONFIG_SND_ATMEL_SOC_PDC) += snd-soc-atmel-pcm-pdc.o -obj-$(CONFIG_SND_ATMEL_SOC_DMA) += snd-soc-atmel-pcm-dma.o +# pdc and dma need to both be built-in if any user of +# ssc is built-in. +ifdef CONFIG_SND_ATMEL_SOC_PDC +obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-pdc.o +endif +ifdef CONFIG_SND_ATMEL_SOC_DMA +obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel-pcm-dma.o +endif obj-$(CONFIG_SND_ATMEL_SOC_SSC) += snd-soc-atmel_ssc_dai.o obj-$(CONFIG_SND_ATMEL_SOC_I2S) += snd-soc-atmel-i2s.o obj-$(CONFIG_SND_MCHP_SOC_I2S_MCC) += snd-soc-mchp-i2s-mcc.o diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 861210f6bf4f..4cbef9affffd 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1564,13 +1564,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index aa1f9637d895..e949b372cead 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1344,7 +1344,8 @@ static int sgtl5000_set_power_regs(struct snd_soc_component *component) * if vddio == vdda the source of charge pump should be * assigned manually to VDDIO */ - if (vddio == vdda) { + if (regulator_is_equal(sgtl5000->supplies[VDDA].consumer, + sgtl5000->supplies[VDDIO].consumer)) { lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD; lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO << SGTL5000_VDDC_MAN_ASSN_SHIFT; diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 2a9b610f6d43..d3d32b501aca 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1030,8 +1030,8 @@ static int wm_coeff_write_acked_control(struct wm_coeff_ctl *ctl, return -ETIMEDOUT; } -static int wm_coeff_write_control(struct wm_coeff_ctl *ctl, - const void *buf, size_t len) +static int wm_coeff_write_ctrl_raw(struct wm_coeff_ctl *ctl, + const void *buf, size_t len) { struct wm_adsp *dsp = ctl->dsp; void *scratch; @@ -1061,6 +1061,23 @@ static int wm_coeff_write_control(struct wm_coeff_ctl *ctl, return 0; } +static int wm_coeff_write_ctrl(struct wm_coeff_ctl *ctl, + const void *buf, size_t len) +{ + int ret = 0; + + if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) + ret = -EPERM; + else if (buf != ctl->cache) + memcpy(ctl->cache, buf, len); + + ctl->set = 1; + if (ctl->enabled && ctl->dsp->running) + ret = wm_coeff_write_ctrl_raw(ctl, buf, len); + + return ret; +} + static int wm_coeff_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *ucontrol) { @@ -1071,16 +1088,7 @@ static int wm_coeff_put(struct snd_kcontrol *kctl, int ret = 0; mutex_lock(&ctl->dsp->pwr_lock); - - if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) - ret = -EPERM; - else - memcpy(ctl->cache, p, ctl->len); - - ctl->set = 1; - if (ctl->enabled && ctl->dsp->running) - ret = wm_coeff_write_control(ctl, p, ctl->len); - + ret = wm_coeff_write_ctrl(ctl, p, ctl->len); mutex_unlock(&ctl->dsp->pwr_lock); return ret; @@ -1096,15 +1104,10 @@ static int wm_coeff_tlv_put(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - if (copy_from_user(ctl->cache, bytes, size)) { + if (copy_from_user(ctl->cache, bytes, size)) ret = -EFAULT; - } else { - ctl->set = 1; - if (ctl->enabled && ctl->dsp->running) - ret = wm_coeff_write_control(ctl, ctl->cache, size); - else if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) - ret = -EPERM; - } + else + ret = wm_coeff_write_ctrl(ctl, ctl->cache, size); mutex_unlock(&ctl->dsp->pwr_lock); @@ -1135,8 +1138,8 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl, return ret; } -static int wm_coeff_read_control(struct wm_coeff_ctl *ctl, - void *buf, size_t len) +static int wm_coeff_read_ctrl_raw(struct wm_coeff_ctl *ctl, + void *buf, size_t len) { struct wm_adsp *dsp = ctl->dsp; void *scratch; @@ -1166,29 +1169,37 @@ static int wm_coeff_read_control(struct wm_coeff_ctl *ctl, return 0; } -static int wm_coeff_get(struct snd_kcontrol *kctl, - struct snd_ctl_elem_value *ucontrol) +static int wm_coeff_read_ctrl(struct wm_coeff_ctl *ctl, void *buf, size_t len) { - struct soc_bytes_ext *bytes_ext = - (struct soc_bytes_ext *)kctl->private_value; - struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); - char *p = ucontrol->value.bytes.data; int ret = 0; - mutex_lock(&ctl->dsp->pwr_lock); - if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) { if (ctl->enabled && ctl->dsp->running) - ret = wm_coeff_read_control(ctl, p, ctl->len); + return wm_coeff_read_ctrl_raw(ctl, buf, len); else - ret = -EPERM; + return -EPERM; } else { if (!ctl->flags && ctl->enabled && ctl->dsp->running) - ret = wm_coeff_read_control(ctl, ctl->cache, ctl->len); + ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len); - memcpy(p, ctl->cache, ctl->len); + if (buf != ctl->cache) + memcpy(buf, ctl->cache, len); } + return ret; +} + +static int wm_coeff_get(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *ucontrol) +{ + struct soc_bytes_ext *bytes_ext = + (struct soc_bytes_ext *)kctl->private_value; + struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext); + char *p = ucontrol->value.bytes.data; + int ret; + + mutex_lock(&ctl->dsp->pwr_lock); + ret = wm_coeff_read_ctrl(ctl, p, ctl->len); mutex_unlock(&ctl->dsp->pwr_lock); return ret; @@ -1204,15 +1215,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) { - if (ctl->enabled && ctl->dsp->running) - ret = wm_coeff_read_control(ctl, ctl->cache, size); - else - ret = -EPERM; - } else { - if (!ctl->flags && ctl->enabled && ctl->dsp->running) - ret = wm_coeff_read_control(ctl, ctl->cache, size); - } + ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size); if (!ret && copy_to_user(bytes, ctl->cache, size)) ret = -EFAULT; @@ -1340,7 +1343,7 @@ static int wm_coeff_init_control_caches(struct wm_adsp *dsp) * created so we don't need to do anything. */ if (!ctl->flags || (ctl->flags & WMFW_CTL_FLAG_READABLE)) { - ret = wm_coeff_read_control(ctl, ctl->cache, ctl->len); + ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len); if (ret < 0) return ret; } @@ -1358,7 +1361,8 @@ static int wm_coeff_sync_controls(struct wm_adsp *dsp) if (!ctl->enabled) continue; if (ctl->set && !(ctl->flags & WMFW_CTL_FLAG_VOLATILE)) { - ret = wm_coeff_write_control(ctl, ctl->cache, ctl->len); + ret = wm_coeff_write_ctrl_raw(ctl, ctl->cache, + ctl->len); if (ret < 0) return ret; } @@ -2048,7 +2052,7 @@ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, if (len > ctl->len) return -EINVAL; - ret = wm_coeff_write_control(ctl, buf, len); + ret = wm_coeff_write_ctrl(ctl, buf, len); kcontrol = snd_soc_card_get_kcontrol(dsp->component->card, ctl->name); snd_ctl_notify(dsp->component->card->snd_card, @@ -2070,7 +2074,7 @@ int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, if (len > ctl->len) return -EINVAL; - return wm_coeff_read_control(ctl, buf, len); + return wm_coeff_read_ctrl(ctl, buf, len); } EXPORT_SYMBOL_GPL(wm_adsp_read_ctl); diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index b517e4bc1b87..41b83ecaf008 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -1019,12 +1019,24 @@ static int fsl_sai_probe(struct platform_device *pdev) ret = devm_snd_soc_register_component(&pdev->dev, &fsl_component, &fsl_sai_dai, 1); if (ret) - return ret; + goto err_pm_disable; - if (sai->soc_data->use_imx_pcm) - return imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE); - else - return devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (sai->soc_data->use_imx_pcm) { + ret = imx_pcm_dma_init(pdev, IMX_SAI_DMABUF_SIZE); + if (ret) + goto err_pm_disable; + } else { + ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); + if (ret) + goto err_pm_disable; + } + + return ret; + +err_pm_disable: + pm_runtime_disable(&pdev->dev); + + return ret; } static int fsl_sai_remove(struct platform_device *pdev) diff --git a/sound/soc/intel/boards/bxt_da7219_max98357a.c b/sound/soc/intel/boards/bxt_da7219_max98357a.c index 5873abb46441..749b1c4f1cee 100644 --- a/sound/soc/intel/boards/bxt_da7219_max98357a.c +++ b/sound/soc/intel/boards/bxt_da7219_max98357a.c @@ -617,12 +617,15 @@ static int bxt_card_late_probe(struct snd_soc_card *card) snd_soc_dapm_add_routes(&card->dapm, broxton_map, ARRAY_SIZE(broxton_map)); - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -643,9 +646,6 @@ static int bxt_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/bxt_rt298.c b/sound/soc/intel/boards/bxt_rt298.c index eabf9d8468ae..becfc4fc1aff 100644 --- a/sound/soc/intel/boards/bxt_rt298.c +++ b/sound/soc/intel/boards/bxt_rt298.c @@ -529,12 +529,15 @@ static int bxt_card_late_probe(struct snd_soc_card *card) int err, i = 0; char jack_name[NAME_SIZE]; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct bxt_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -555,9 +558,6 @@ static int bxt_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/cml_rt1011_rt5682.c b/sound/soc/intel/boards/cml_rt1011_rt5682.c index 5f1bf6d3800c..a54636f77c8e 100644 --- a/sound/soc/intel/boards/cml_rt1011_rt5682.c +++ b/sound/soc/intel/boards/cml_rt1011_rt5682.c @@ -241,12 +241,15 @@ static int sof_card_late_probe(struct snd_soc_card *card) struct hdmi_pcm *pcm; int ret, i = 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -265,8 +268,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/glk_rt5682_max98357a.c b/sound/soc/intel/boards/glk_rt5682_max98357a.c index b36264d1d1cd..94c6bdfab63b 100644 --- a/sound/soc/intel/boards/glk_rt5682_max98357a.c +++ b/sound/soc/intel/boards/glk_rt5682_max98357a.c @@ -544,15 +544,18 @@ static int glk_card_late_probe(struct snd_soc_card *card) struct snd_soc_component *component = NULL; char jack_name[NAME_SIZE]; struct glk_hdmi_pcm *pcm; - int err = 0; + int err; int i = 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct glk_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct glk_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -573,9 +576,6 @@ static int glk_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/intel/boards/skl_hda_dsp_common.c b/sound/soc/intel/boards/skl_hda_dsp_common.c index eb419e1ec42b..78ff5f24c40e 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_common.c +++ b/sound/soc/intel/boards/skl_hda_dsp_common.c @@ -41,16 +41,19 @@ int skl_hda_hdmi_add_pcm(struct snd_soc_card *card, int device) return 0; } -SND_SOC_DAILINK_DEFS(idisp1, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin")), +SND_SOC_DAILINK_DEF(idisp1_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp1 Pin"))); +SND_SOC_DAILINK_DEF(idisp1_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi1"))); -SND_SOC_DAILINK_DEFS(idisp2, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin")), +SND_SOC_DAILINK_DEF(idisp2_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp2 Pin"))); +SND_SOC_DAILINK_DEF(idisp2_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi2"))); -SND_SOC_DAILINK_DEFS(idisp3, - DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin")), +SND_SOC_DAILINK_DEF(idisp3_cpu, + DAILINK_COMP_ARRAY(COMP_CPU("iDisp3 Pin"))); +SND_SOC_DAILINK_DEF(idisp3_codec, DAILINK_COMP_ARRAY(COMP_CODEC("ehdaudio0D2", "intel-hdmi-hifi3"))); SND_SOC_DAILINK_DEF(analog_cpu, @@ -83,21 +86,21 @@ struct snd_soc_dai_link skl_hda_be_dai_links[HDA_DSP_MAX_BE_DAI_LINKS] = { .id = 1, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp1), + SND_SOC_DAILINK_REG(idisp1_cpu, idisp1_codec, platform), }, { .name = "iDisp2", .id = 2, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp2), + SND_SOC_DAILINK_REG(idisp2_cpu, idisp2_codec, platform), }, { .name = "iDisp3", .id = 3, .dpcm_playback = 1, .no_pcm = 1, - SND_SOC_DAILINK_REG(idisp3), + SND_SOC_DAILINK_REG(idisp3_cpu, idisp3_codec, platform), }, { .name = "Analog Playback and Capture", diff --git a/sound/soc/intel/boards/skl_hda_dsp_generic.c b/sound/soc/intel/boards/skl_hda_dsp_generic.c index 4e45901e3a2f..11eaee9ae41f 100644 --- a/sound/soc/intel/boards/skl_hda_dsp_generic.c +++ b/sound/soc/intel/boards/skl_hda_dsp_generic.c @@ -100,6 +100,8 @@ static struct snd_soc_card hda_soc_card = { .late_probe = skl_hda_card_late_probe, }; +static char hda_soc_components[30]; + #define IDISP_DAI_COUNT 3 #define HDAC_DAI_COUNT 2 #define DMIC_DAI_COUNT 2 @@ -183,6 +185,12 @@ static int skl_hda_audio_probe(struct platform_device *pdev) hda_soc_card.dev = &pdev->dev; snd_soc_card_set_drvdata(&hda_soc_card, ctx); + if (mach->mach_params.dmic_num > 0) { + snprintf(hda_soc_components, sizeof(hda_soc_components), + "cfg-dmics:%d", mach->mach_params.dmic_num); + hda_soc_card.components = hda_soc_components; + } + return devm_snd_soc_register_card(&pdev->dev, &hda_soc_card); } diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 751b8ea6ae1f..5d878873a8e0 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -35,6 +35,10 @@ #define SOF_RT5682_SSP_AMP(quirk) \ (((quirk) << SOF_RT5682_SSP_AMP_SHIFT) & SOF_RT5682_SSP_AMP_MASK) #define SOF_RT5682_MCLK_BYTCHT_EN BIT(9) +#define SOF_RT5682_NUM_HDMIDEV_SHIFT 10 +#define SOF_RT5682_NUM_HDMIDEV_MASK (GENMASK(12, 10)) +#define SOF_RT5682_NUM_HDMIDEV(quirk) \ + ((quirk << SOF_RT5682_NUM_HDMIDEV_SHIFT) & SOF_RT5682_NUM_HDMIDEV_MASK) /* Default: MCLK on, MCLK 19.2M, SSP0 */ static unsigned long sof_rt5682_quirk = SOF_RT5682_MCLK_EN | @@ -269,19 +273,22 @@ static int sof_card_late_probe(struct snd_soc_card *card) struct snd_soc_component *component = NULL; char jack_name[NAME_SIZE]; struct sof_hdmi_pcm *pcm; - int err = 0; + int err; int i = 0; /* HDMI is not supported by SOF on Baytrail/CherryTrail */ if (is_legacy_cpu) return 0; - pcm = list_first_entry(&ctx->hdmi_pcm_list, struct sof_hdmi_pcm, - head); - component = pcm->codec_dai->component; + if (list_empty(&ctx->hdmi_pcm_list)) + return -EINVAL; - if (ctx->common_hdmi_codec_drv) + if (ctx->common_hdmi_codec_drv) { + pcm = list_first_entry(&ctx->hdmi_pcm_list, struct sof_hdmi_pcm, + head); + component = pcm->codec_dai->component; return hda_dsp_hdmi_build_controls(card, component); + } list_for_each_entry(pcm, &ctx->hdmi_pcm_list, head) { component = pcm->codec_dai->component; @@ -301,8 +308,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) i++; } - if (!component) - return -EINVAL; return hdac_hdmi_jack_port_init(component, &card->dapm); } @@ -594,6 +599,19 @@ static int sof_audio_probe(struct platform_device *pdev) if (!ctx) return -ENOMEM; + if (pdev->id_entry && pdev->id_entry->driver_data) + sof_rt5682_quirk = (unsigned long)pdev->id_entry->driver_data; + + dmi_check_system(sof_rt5682_quirk_table); + + mach = (&pdev->dev)->platform_data; + + /* A speaker amp might not be present when the quirk claims one is. + * Detect this via whether the machine driver match includes quirk_data. + */ + if ((sof_rt5682_quirk & SOF_SPEAKER_AMP_PRESENT) && !mach->quirk_data) + sof_rt5682_quirk &= ~SOF_SPEAKER_AMP_PRESENT; + if (soc_intel_is_byt() || soc_intel_is_cht()) { is_legacy_cpu = 1; dmic_be_num = 0; @@ -604,11 +622,13 @@ static int sof_audio_probe(struct platform_device *pdev) SOF_RT5682_SSP_CODEC(2); } else { dmic_be_num = 2; - hdmi_num = 3; + hdmi_num = (sof_rt5682_quirk & SOF_RT5682_NUM_HDMIDEV_MASK) >> + SOF_RT5682_NUM_HDMIDEV_SHIFT; + /* default number of HDMI DAI's */ + if (!hdmi_num) + hdmi_num = 3; } - dmi_check_system(sof_rt5682_quirk_table); - /* need to get main clock from pmc */ if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) { ctx->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3"); @@ -652,7 +672,6 @@ static int sof_audio_probe(struct platform_device *pdev) INIT_LIST_HEAD(&ctx->hdmi_pcm_list); sof_audio_card_rt5682.dev = &pdev->dev; - mach = (&pdev->dev)->platform_data; /* set platform name for each dailink */ ret = snd_soc_fixup_dai_links_platform_name(&sof_audio_card_rt5682, @@ -683,6 +702,21 @@ static int sof_rt5682_remove(struct platform_device *pdev) return 0; } +static const struct platform_device_id board_ids[] = { + { + .name = "sof_rt5682", + }, + { + .name = "tgl_max98357a_rt5682", + .driver_data = (kernel_ulong_t)(SOF_RT5682_MCLK_EN | + SOF_RT5682_SSP_CODEC(0) | + SOF_SPEAKER_AMP_PRESENT | + SOF_RT5682_SSP_AMP(1) | + SOF_RT5682_NUM_HDMIDEV(4)), + }, + { } +}; + static struct platform_driver sof_audio = { .probe = sof_audio_probe, .remove = sof_rt5682_remove, @@ -690,6 +724,7 @@ static struct platform_driver sof_audio = { .name = "sof_rt5682", .pm = &snd_soc_pm_ops, }, + .id_table = board_ids, }; module_platform_driver(sof_audio) @@ -699,3 +734,4 @@ MODULE_AUTHOR("Bard Liao "); MODULE_AUTHOR("Sathya Prakash M R "); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:sof_rt5682"); +MODULE_ALIAS("platform:tgl_max98357a_rt5682"); diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 3466675f2678..a15aa2ffa681 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -34,8 +34,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) - ret += snprintf(buf + size, MOD_BUF - size, + for (i = 0; i < max_pin; i++) { + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -45,13 +45,15 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -75,16 +77,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tCPC %#x\n\tIBS %#x\n\tOBS %#x\t\n", res->cpc, res->ibs, res->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -94,38 +96,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -135,7 +137,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogeneous Input %s\n\t" "Homogeneous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -191,7 +193,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); diff --git a/sound/soc/intel/skylake/skl-ssp-clk.c b/sound/soc/intel/skylake/skl-ssp-clk.c index 1c0e5226cb5b..bd43885f3805 100644 --- a/sound/soc/intel/skylake/skl-ssp-clk.c +++ b/sound/soc/intel/skylake/skl-ssp-clk.c @@ -384,9 +384,11 @@ static int skl_clk_dev_probe(struct platform_device *pdev) &clks[i], clk_pdata, i); if (IS_ERR(data->clk[data->avail_clk_cnt])) { - ret = PTR_ERR(data->clk[data->avail_clk_cnt++]); + ret = PTR_ERR(data->clk[data->avail_clk_cnt]); goto err_unreg_skl_clk; } + + data->avail_clk_cnt++; } platform_set_drvdata(pdev, data); diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index d6f3eefb8f09..200bddf159a2 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -113,10 +113,12 @@ int axg_fifo_pcm_hw_params(struct snd_soc_component *component, { struct snd_pcm_runtime *runtime = ss->runtime; struct axg_fifo *fifo = axg_fifo_data(ss); + unsigned int burst_num, period, threshold; dma_addr_t end_ptr; - unsigned int burst_num; int ret; + period = params_period_bytes(params); + ret = snd_pcm_lib_malloc_pages(ss, params_buffer_bytes(params)); if (ret < 0) return ret; @@ -127,9 +129,25 @@ int axg_fifo_pcm_hw_params(struct snd_soc_component *component, regmap_write(fifo->map, FIFO_FINISH_ADDR, end_ptr); /* Setup interrupt periodicity */ - burst_num = params_period_bytes(params) / AXG_FIFO_BURST; + burst_num = period / AXG_FIFO_BURST; regmap_write(fifo->map, FIFO_INT_ADDR, burst_num); + /* + * Start the fifo request on the smallest of the following: + * - Half the fifo size + * - Half the period size + */ + threshold = min(period / 2, + (unsigned int)AXG_FIFO_MIN_DEPTH / 2); + + /* + * With the threshold in bytes, register value is: + * V = (threshold / burst) - 1 + */ + threshold /= AXG_FIFO_BURST; + regmap_field_write(fifo->field_threshold, + threshold ? threshold - 1 : 0); + /* Enable block count irq */ regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_INT_EN(FIFO_INT_COUNT_REPEAT), @@ -352,6 +370,11 @@ int axg_fifo_probe(struct platform_device *pdev) return fifo->irq; } + fifo->field_threshold = + devm_regmap_field_alloc(dev, fifo->map, data->field_threshold); + if (IS_ERR(fifo->field_threshold)) + return PTR_ERR(fifo->field_threshold); + return devm_snd_soc_register_component(dev, data->component_drv, data->dai_drv, 1); } diff --git a/sound/soc/meson/axg-fifo.h b/sound/soc/meson/axg-fifo.h index cf928d43b558..c442195ba191 100644 --- a/sound/soc/meson/axg-fifo.h +++ b/sound/soc/meson/axg-fifo.h @@ -9,7 +9,9 @@ struct clk; struct platform_device; +struct reg_field; struct regmap; +struct regmap_field; struct reset_control; struct snd_soc_component_driver; @@ -50,8 +52,6 @@ struct snd_soc_pcm_runtime; #define CTRL1_STATUS2_SEL_MASK GENMASK(11, 8) #define CTRL1_STATUS2_SEL(x) ((x) << 8) #define STATUS2_SEL_DDR_READ 0 -#define CTRL1_THRESHOLD_MASK GENMASK(23, 16) -#define CTRL1_THRESHOLD(x) ((x) << 16) #define CTRL1_FRDDR_DEPTH_MASK GENMASK(31, 24) #define CTRL1_FRDDR_DEPTH(x) ((x) << 24) #define FIFO_START_ADDR 0x08 @@ -67,12 +67,14 @@ struct axg_fifo { struct regmap *map; struct clk *pclk; struct reset_control *arb; + struct regmap_field *field_threshold; int irq; }; struct axg_fifo_match_data { const struct snd_soc_component_driver *component_drv; struct snd_soc_dai_driver *dai_drv; + struct reg_field field_threshold; }; int axg_fifo_pcm_open(struct snd_soc_component *component, diff --git a/sound/soc/meson/axg-frddr.c b/sound/soc/meson/axg-frddr.c index 665d75d49d7b..657b05c739b5 100644 --- a/sound/soc/meson/axg-frddr.c +++ b/sound/soc/meson/axg-frddr.c @@ -50,7 +50,7 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); - unsigned int fifo_depth, fifo_threshold; + unsigned int fifo_depth; int ret; /* Enable pclk to access registers and clock the fifo ip */ @@ -68,11 +68,8 @@ static int axg_frddr_dai_startup(struct snd_pcm_substream *substream, * Depth and threshold are zero based. */ fifo_depth = AXG_FIFO_MIN_CNT - 1; - fifo_threshold = (AXG_FIFO_MIN_CNT / 2) - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, - CTRL1_FRDDR_DEPTH_MASK | CTRL1_THRESHOLD_MASK, - CTRL1_FRDDR_DEPTH(fifo_depth) | - CTRL1_THRESHOLD(fifo_threshold)); + regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_FRDDR_DEPTH_MASK, + CTRL1_FRDDR_DEPTH(fifo_depth)); return 0; } @@ -159,8 +156,9 @@ static const struct snd_soc_component_driver axg_frddr_component_drv = { }; static const struct axg_fifo_match_data axg_frddr_match_data = { - .component_drv = &axg_frddr_component_drv, - .dai_drv = &axg_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &axg_frddr_component_drv, + .dai_drv = &axg_frddr_dai_drv }; static const struct snd_soc_dai_ops g12a_frddr_ops = { @@ -283,8 +281,9 @@ static const struct snd_soc_component_driver g12a_frddr_component_drv = { }; static const struct axg_fifo_match_data g12a_frddr_match_data = { - .component_drv = &g12a_frddr_component_drv, - .dai_drv = &g12a_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &g12a_frddr_component_drv, + .dai_drv = &g12a_frddr_dai_drv }; /* On SM1, the output selection in on CTRL2 */ @@ -353,8 +352,9 @@ static const struct snd_soc_component_driver sm1_frddr_component_drv = { }; static const struct axg_fifo_match_data sm1_frddr_match_data = { - .component_drv = &sm1_frddr_component_drv, - .dai_drv = &g12a_frddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &sm1_frddr_component_drv, + .dai_drv = &g12a_frddr_dai_drv }; static const struct of_device_id axg_frddr_of_match[] = { diff --git a/sound/soc/meson/axg-toddr.c b/sound/soc/meson/axg-toddr.c index 7fef0b961496..e90f1b3198ee 100644 --- a/sound/soc/meson/axg-toddr.c +++ b/sound/soc/meson/axg-toddr.c @@ -89,7 +89,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct axg_fifo *fifo = snd_soc_dai_get_drvdata(dai); - unsigned int fifo_threshold; int ret; /* Enable pclk to access registers and clock the fifo ip */ @@ -107,11 +106,6 @@ static int axg_toddr_dai_startup(struct snd_pcm_substream *substream, /* Apply single buffer mode to the interface */ regmap_update_bits(fifo->map, FIFO_CTRL0, CTRL0_TODDR_PP_MODE, 0); - /* TODDR does not have a configurable fifo depth */ - fifo_threshold = AXG_FIFO_MIN_CNT - 1; - regmap_update_bits(fifo->map, FIFO_CTRL1, CTRL1_THRESHOLD_MASK, - CTRL1_THRESHOLD(fifo_threshold)); - return 0; } @@ -191,8 +185,9 @@ static const struct snd_soc_component_driver axg_toddr_component_drv = { }; static const struct axg_fifo_match_data axg_toddr_match_data = { - .component_drv = &axg_toddr_component_drv, - .dai_drv = &axg_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &axg_toddr_component_drv, + .dai_drv = &axg_toddr_dai_drv }; static const struct snd_soc_dai_ops g12a_toddr_ops = { @@ -230,8 +225,9 @@ static const struct snd_soc_component_driver g12a_toddr_component_drv = { }; static const struct axg_fifo_match_data g12a_toddr_match_data = { - .component_drv = &g12a_toddr_component_drv, - .dai_drv = &g12a_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 16, 23), + .component_drv = &g12a_toddr_component_drv, + .dai_drv = &g12a_toddr_dai_drv }; static const char * const sm1_toddr_sel_texts[] = { @@ -300,8 +296,9 @@ static const struct snd_soc_component_driver sm1_toddr_component_drv = { }; static const struct axg_fifo_match_data sm1_toddr_match_data = { - .component_drv = &sm1_toddr_component_drv, - .dai_drv = &g12a_toddr_dai_drv + .field_threshold = REG_FIELD(FIFO_CTRL1, 12, 23), + .component_drv = &sm1_toddr_component_drv, + .dai_drv = &g12a_toddr_dai_drv }; static const struct of_device_id axg_toddr_of_match[] = { diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index b94680fb26fa..a770e66b233a 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -452,7 +452,7 @@ int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream) int ret; for_each_rtd_components(rtd, rtdcom, component) { - if (component->driver->ioctl) { + if (component->driver->sync_stop) { ret = component->driver->sync_stop(component, substream); if (ret < 0) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index b6378f025836..ebd785f9aa46 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3888,9 +3888,6 @@ snd_soc_dai_link_event_pre_pmu(struct snd_soc_dapm_widget *w, runtime->rate = params_rate(params); out: - if (ret < 0) - kfree(runtime); - kfree(params); return ret; } @@ -4752,7 +4749,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index a428ff393ea2..2b5f3b1b062b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -117,7 +117,6 @@ dmaengine_pcm_set_runtime_hwparams(struct snd_soc_component *component, struct dma_chan *chan = pcm->chan[substream->stream]; struct snd_dmaengine_dai_dma_data *dma_data; struct snd_pcm_hardware hw; - int ret; if (pcm->config && pcm->config->pcm_hardware) return snd_soc_set_runtime_hwparams(substream, @@ -138,12 +137,15 @@ dmaengine_pcm_set_runtime_hwparams(struct snd_soc_component *component, if (pcm->flags & SND_DMAENGINE_PCM_FLAG_NO_RESIDUE) hw.info |= SNDRV_PCM_INFO_BATCH; - ret = snd_dmaengine_pcm_refine_runtime_hwparams(substream, - dma_data, - &hw, - chan); - if (ret) - return ret; + /** + * FIXME: Remove the return value check to align with the code + * before adding snd_dmaengine_pcm_refine_runtime_hwparams + * function. + */ + snd_dmaengine_pcm_refine_runtime_hwparams(substream, + dma_data, + &hw, + chan); return snd_soc_set_runtime_hwparams(substream, &hw); } diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 01e7bc03d92f..8de29f48442f 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -3192,16 +3192,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, unsigned long flags; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -3209,10 +3209,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -3222,16 +3222,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 92e4f4d08bfa..56a7142f15a0 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -604,9 +604,11 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, ext_ops = tplg->bytes_ext_ops; num_ops = tplg->bytes_ext_ops_count; for (i = 0; i < num_ops; i++) { - if (!sbe->put && ext_ops[i].id == be->ext_ops.put) + if (!sbe->put && + ext_ops[i].id == le32_to_cpu(be->ext_ops.put)) sbe->put = ext_ops[i].put; - if (!sbe->get && ext_ops[i].id == be->ext_ops.get) + if (!sbe->get && + ext_ops[i].id == le32_to_cpu(be->ext_ops.get)) sbe->get = ext_ops[i].get; } @@ -621,11 +623,11 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, num_ops = tplg->io_ops_count; for (i = 0; i < num_ops; i++) { - if (k->put == NULL && ops[i].id == hdr->ops.put) + if (k->put == NULL && ops[i].id == le32_to_cpu(hdr->ops.put)) k->put = ops[i].put; - if (k->get == NULL && ops[i].id == hdr->ops.get) + if (k->get == NULL && ops[i].id == le32_to_cpu(hdr->ops.get)) k->get = ops[i].get; - if (k->info == NULL && ops[i].id == hdr->ops.info) + if (k->info == NULL && ops[i].id == le32_to_cpu(hdr->ops.info)) k->info = ops[i].info; } @@ -638,11 +640,11 @@ static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr, num_ops = ARRAY_SIZE(io_ops); for (i = 0; i < num_ops; i++) { - if (k->put == NULL && ops[i].id == hdr->ops.put) + if (k->put == NULL && ops[i].id == le32_to_cpu(hdr->ops.put)) k->put = ops[i].put; - if (k->get == NULL && ops[i].id == hdr->ops.get) + if (k->get == NULL && ops[i].id == le32_to_cpu(hdr->ops.get)) k->get = ops[i].get; - if (k->info == NULL && ops[i].id == hdr->ops.info) + if (k->info == NULL && ops[i].id == le32_to_cpu(hdr->ops.info)) k->info = ops[i].info; } @@ -931,7 +933,7 @@ static int soc_tplg_denum_create_texts(struct soc_enum *se, if (se->dobj.control.dtexts == NULL) return -ENOMEM; - for (i = 0; i < ec->items; i++) { + for (i = 0; i < le32_to_cpu(ec->items); i++) { if (strnlen(ec->texts[i], SNDRV_CTL_ELEM_ID_NAME_MAXLEN) == SNDRV_CTL_ELEM_ID_NAME_MAXLEN) { @@ -1325,7 +1327,7 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dmixer_create( if (kc[i].name == NULL) goto err_sm; kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; - kc[i].access = mc->hdr.access; + kc[i].access = le32_to_cpu(mc->hdr.access); /* we only support FL/FR channel mapping atm */ sm->reg = tplc_chan_get_reg(tplg, mc->channel, @@ -1337,10 +1339,10 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dmixer_create( sm->rshift = tplc_chan_get_shift(tplg, mc->channel, SNDRV_CHMAP_FR); - sm->max = mc->max; - sm->min = mc->min; - sm->invert = mc->invert; - sm->platform_max = mc->platform_max; + sm->max = le32_to_cpu(mc->max); + sm->min = le32_to_cpu(mc->min); + sm->invert = le32_to_cpu(mc->invert); + sm->platform_max = le32_to_cpu(mc->platform_max); sm->dobj.index = tplg->index; INIT_LIST_HEAD(&sm->dobj.list); @@ -1401,7 +1403,7 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_denum_create( goto err_se; tplg->pos += (sizeof(struct snd_soc_tplg_enum_control) + - ec->priv.size); + le32_to_cpu(ec->priv.size)); dev_dbg(tplg->dev, " adding DAPM widget enum control %s\n", ec->hdr.name); @@ -1411,7 +1413,7 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_denum_create( if (kc[i].name == NULL) goto err_se; kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; - kc[i].access = ec->hdr.access; + kc[i].access = le32_to_cpu(ec->hdr.access); /* we only support FL/FR channel mapping atm */ se->reg = tplc_chan_get_reg(tplg, ec->channel, SNDRV_CHMAP_FL); @@ -1420,8 +1422,8 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_denum_create( se->shift_r = tplc_chan_get_shift(tplg, ec->channel, SNDRV_CHMAP_FR); - se->items = ec->items; - se->mask = ec->mask; + se->items = le32_to_cpu(ec->items); + se->mask = le32_to_cpu(ec->mask); se->dobj.index = tplg->index; switch (le32_to_cpu(ec->hdr.ops.info)) { @@ -1523,9 +1525,9 @@ static struct snd_kcontrol_new *soc_tplg_dapm_widget_dbytes_create( if (kc[i].name == NULL) goto err_sbe; kc[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; - kc[i].access = be->hdr.access; + kc[i].access = le32_to_cpu(be->hdr.access); - sbe->max = be->max; + sbe->max = le32_to_cpu(be->max); INIT_LIST_HEAD(&sbe->dobj.list); /* map standard io handlers and check for external handlers */ @@ -1906,6 +1908,10 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg, link->num_codecs = 1; link->num_platforms = 1; + link->dobj.index = tplg->index; + link->dobj.ops = tplg->ops; + link->dobj.type = SND_SOC_DOBJ_DAI_LINK; + if (strlen(pcm->pcm_name)) { link->name = kstrdup(pcm->pcm_name, GFP_KERNEL); link->stream_name = kstrdup(pcm->pcm_name, GFP_KERNEL); @@ -1942,9 +1948,6 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg, goto err; } - link->dobj.index = tplg->index; - link->dobj.ops = tplg->ops; - link->dobj.type = SND_SOC_DOBJ_DAI_LINK; list_add(&link->dobj.list, &tplg->comp->dobj_list); return 0; @@ -2332,8 +2335,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size @@ -2497,7 +2503,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2510,19 +2516,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, tplg->index, _manifest); + ret = tplg->ops->manifest(tplg->comp, tplg->index, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 805918d3bcc0..a06a54f423dd 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -306,6 +306,46 @@ static int sof_machine_check(struct snd_sof_dev *sdev) #endif } +/* + * FW Boot State Transition Diagram + * + * +-----------------------------------------------------------------------+ + * | | + * ------------------ ------------------ | + * | | | | | + * | BOOT_FAILED | | READY_FAILED |-------------------------+ | + * | | | | | | + * ------------------ ------------------ | | + * ^ ^ | | + * | | | | + * (FW Boot Timeout) (FW_READY FAIL) | | + * | | | | + * | | | | + * ------------------ | ------------------ | | + * | | | | | | | + * | IN_PROGRESS |---------------+------------->| COMPLETE | | | + * | | (FW Boot OK) (FW_READY OK) | | | | + * ------------------ ------------------ | | + * ^ | | | + * | | | | + * (FW Loading OK) (System Suspend/Runtime Suspend) + * | | | | + * | | | | + * ------------------ ------------------ | | | + * | | | |<-----+ | | + * | PREPARE | | NOT_STARTED |<---------------------+ | + * | | | |<---------------------------+ + * ------------------ ------------------ + * | ^ | ^ + * | | | | + * | +-----------------------+ | + * | (DSP Probe OK) | + * | | + * | | + * +------------------------------------+ + * (System Suspend/Runtime Suspend) + */ + static int sof_probe_continue(struct snd_sof_dev *sdev) { struct snd_sof_pdata *plat_data = sdev->pdata; @@ -321,6 +361,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return ret; } + sdev->fw_state = SOF_FW_BOOT_PREPARE; + /* check machine info */ ret = sof_machine_check(sdev); if (ret < 0) { @@ -360,7 +402,12 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) goto fw_load_err; } - /* boot the firmware */ + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; + + /* + * Boot the firmware. The FW boot status will be modified + * in snd_sof_run_firmware() depending on the outcome. + */ ret = snd_sof_run_firmware(sdev); if (ret < 0) { dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n", @@ -394,7 +441,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (ret < 0) { dev_err(sdev->dev, "error: failed to register DSP DAI driver %d\n", ret); - goto fw_run_err; + goto fw_trace_err; } drv_name = plat_data->machine->drv_name; @@ -408,7 +455,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) if (IS_ERR(plat_data->pdev_mach)) { ret = PTR_ERR(plat_data->pdev_mach); - goto fw_run_err; + goto fw_trace_err; } dev_dbg(sdev->dev, "created machine %s\n", @@ -419,7 +466,8 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) return 0; -#if !IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE) +fw_trace_err: + snd_sof_free_trace(sdev); fw_run_err: snd_sof_fw_unload(sdev); fw_load_err: @@ -428,21 +476,10 @@ static int sof_probe_continue(struct snd_sof_dev *sdev) snd_sof_free_debug(sdev); dbg_err: snd_sof_remove(sdev); -#else - - /* - * when the probe_continue is handled in a work queue, the - * probe does not fail so we don't release resources here. - * They will be released with an explicit call to - * snd_sof_device_remove() when the PCI/ACPI device is removed - */ -fw_run_err: -fw_load_err: -ipc_err: -dbg_err: - -#endif + /* all resources freed, update state to match */ + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + sdev->first_boot = true; return ret; } @@ -476,6 +513,7 @@ int snd_sof_device_probe(struct device *dev, struct snd_sof_pdata *plat_data) sdev->pdata = plat_data; sdev->first_boot = true; + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; dev_set_drvdata(dev, sdev); /* check all mandatory ops */ @@ -524,10 +562,12 @@ int snd_sof_device_remove(struct device *dev) if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) cancel_work_sync(&sdev->probe_work); - snd_sof_fw_unload(sdev); - snd_sof_ipc_free(sdev); - snd_sof_free_debug(sdev); - snd_sof_free_trace(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) { + snd_sof_fw_unload(sdev); + snd_sof_ipc_free(sdev); + snd_sof_free_debug(sdev); + snd_sof_free_trace(sdev); + } /* * Unregister machine driver. This will unbind the snd_card which @@ -543,7 +583,8 @@ int snd_sof_device_remove(struct device *dev) * scheduled on, when they are unloaded. Therefore, the DSP must be * removed only after the topology has been unloaded. */ - snd_sof_remove(sdev); + if (sdev->fw_state > SOF_FW_BOOT_NOT_STARTED) + snd_sof_remove(sdev); /* release firmware */ release_firmware(pdata->fw); diff --git a/sound/soc/sof/intel/apl.c b/sound/soc/sof/intel/apl.c index 7daa8eb456c8..6f45e14f2b2e 100644 --- a/sound/soc/sof/intel/apl.c +++ b/sound/soc/sof/intel/apl.c @@ -41,7 +41,6 @@ const struct snd_sof_dsp_ops sof_apl_ops = { .block_write = sof_block_write, /* doorbell */ - .irq_handler = hda_dsp_ipc_irq_handler, .irq_thread = hda_dsp_ipc_irq_thread, /* ipc */ diff --git a/sound/soc/sof/intel/cnl.c b/sound/soc/sof/intel/cnl.c index 0e1e265f3f3b..9bd169e2691e 100644 --- a/sound/soc/sof/intel/cnl.c +++ b/sound/soc/sof/intel/cnl.c @@ -106,10 +106,6 @@ static irqreturn_t cnl_ipc_irq_thread(int irq, void *context) "nothing to do in IPC IRQ thread\n"); } - /* re-enable IPC interrupt */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPIC, - HDA_DSP_ADSPIC_IPC, HDA_DSP_ADSPIC_IPC); - return IRQ_HANDLED; } @@ -231,7 +227,6 @@ const struct snd_sof_dsp_ops sof_cnl_ops = { .block_write = sof_block_write, /* doorbell */ - .irq_handler = hda_dsp_ipc_irq_handler, .irq_thread = cnl_ipc_irq_thread, /* ipc */ diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index 896d21984b73..3f645200d3a5 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -261,14 +261,11 @@ static int hda_link_pcm_prepare(struct snd_pcm_substream *substream, { struct hdac_ext_stream *link_dev = snd_soc_dai_get_dma_data(dai, substream); - struct sof_intel_hda_stream *hda_stream; struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(dai->component); struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); int stream = substream->stream; - hda_stream = hstream_to_sof_hda_stream(link_dev); - if (link_dev->link_prepared) return 0; @@ -446,6 +443,10 @@ struct snd_soc_dai_driver skl_dai[] = { .name = "iDisp3 Pin", .ops = &hda_link_dai_ops, }, +{ + .name = "iDisp4 Pin", + .ops = &hda_link_dai_ops, +}, { .name = "Analog CPU DAI", .ops = &hda_link_dai_ops, diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c index 0fd2153c1769..1837f66e361f 100644 --- a/sound/soc/sof/intel/hda-ipc.c +++ b/sound/soc/sof/intel/hda-ipc.c @@ -230,22 +230,15 @@ irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context) "nothing to do in IPC IRQ thread\n"); } - /* re-enable IPC interrupt */ - snd_sof_dsp_update_bits(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPIC, - HDA_DSP_ADSPIC_IPC, HDA_DSP_ADSPIC_IPC); - return IRQ_HANDLED; } -/* is this IRQ for ADSP ? - we only care about IPC here */ -irqreturn_t hda_dsp_ipc_irq_handler(int irq, void *context) +/* Check if an IPC IRQ occurred */ +bool hda_dsp_check_ipc_irq(struct snd_sof_dev *sdev) { - struct snd_sof_dev *sdev = context; - int ret = IRQ_NONE; + bool ret = false; u32 irq_status; - spin_lock(&sdev->hw_lock); - /* store status */ irq_status = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPIS); dev_vdbg(sdev->dev, "irq handler: irq_status:0x%x\n", irq_status); @@ -255,16 +248,10 @@ irqreturn_t hda_dsp_ipc_irq_handler(int irq, void *context) goto out; /* IPC message ? */ - if (irq_status & HDA_DSP_ADSPIS_IPC) { - /* disable IPC interrupt */ - snd_sof_dsp_update_bits_unlocked(sdev, HDA_DSP_BAR, - HDA_DSP_REG_ADSPIC, - HDA_DSP_ADSPIC_IPC, 0); - ret = IRQ_WAKE_THREAD; - } + if (irq_status & HDA_DSP_ADSPIS_IPC) + ret = true; out: - spin_unlock(&sdev->hw_lock); return ret; } diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c index bae7ac3581e5..8852184a2569 100644 --- a/sound/soc/sof/intel/hda-loader.c +++ b/sound/soc/sof/intel/hda-loader.c @@ -295,7 +295,6 @@ int hda_dsp_cl_boot_firmware(struct snd_sof_dev *sdev) /* init for booting wait */ init_waitqueue_head(&sdev->boot_wait); - sdev->boot_complete = false; /* prepare DMA for code loader stream */ tag = cl_stream_prepare(sdev, 0x40, stripped_firmware.size, diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c index 29ab43281670..927a36f92c24 100644 --- a/sound/soc/sof/intel/hda-stream.c +++ b/sound/soc/sof/intel/hda-stream.c @@ -549,22 +549,23 @@ int hda_dsp_stream_hw_free(struct snd_sof_dev *sdev, return 0; } -irqreturn_t hda_dsp_stream_interrupt(int irq, void *context) +bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev) { - struct hdac_bus *bus = context; - int ret = IRQ_WAKE_THREAD; + struct hdac_bus *bus = sof_to_bus(sdev); + bool ret = false; u32 status; - spin_lock(&bus->reg_lock); + /* The function can be called at irq thread, so use spin_lock_irq */ + spin_lock_irq(&bus->reg_lock); status = snd_hdac_chip_readl(bus, INTSTS); dev_vdbg(bus->dev, "stream irq, INTSTS status: 0x%x\n", status); - /* Register inaccessible, ignore it.*/ - if (status == 0xffffffff) - ret = IRQ_NONE; + /* if Register inaccessible, ignore it.*/ + if (status != 0xffffffff) + ret = true; - spin_unlock(&bus->reg_lock); + spin_unlock_irq(&bus->reg_lock); return ret; } @@ -602,7 +603,8 @@ static bool hda_dsp_stream_check(struct hdac_bus *bus, u32 status) irqreturn_t hda_dsp_stream_threaded_handler(int irq, void *context) { - struct hdac_bus *bus = context; + struct snd_sof_dev *sdev = context; + struct hdac_bus *bus = sof_to_bus(sdev); #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) u32 rirb_status; #endif diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 91bd88fddac7..a1780259292f 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -168,7 +168,7 @@ void hda_dsp_dump_skl(struct snd_sof_dev *sdev, u32 flags) panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_ADSP_ERROR_CODE_SKL + 0x4); - if (sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, @@ -195,7 +195,7 @@ void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags) HDA_DSP_SRAM_REG_FW_STATUS); panic = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_SRAM_REG_FW_TRACEP); - if (sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) { hda_dsp_get_registers(sdev, &xoops, &panic_info, stack, HDA_DSP_STACK_DUMP_SIZE); snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, @@ -351,7 +351,7 @@ static int hda_init_caps(struct snd_sof_dev *sdev) const char *tplg_filename; const char *idisp_str; const char *dmic_str; - int dmic_num; + int dmic_num = 0; int codec_num = 0; int i; #endif @@ -472,6 +472,7 @@ static int hda_init_caps(struct snd_sof_dev *sdev) mach_params->codec_mask = bus->codec_mask; mach_params->platform = dev_name(sdev->dev); mach_params->common_hdmi_codec_drv = hda_codec_use_common_hdmi; + mach_params->dmic_num = dmic_num; } /* create codec instances */ @@ -499,6 +500,49 @@ static const struct sof_intel_dsp_desc return chip_info; } +static irqreturn_t hda_dsp_interrupt_handler(int irq, void *context) +{ + struct snd_sof_dev *sdev = context; + + /* + * Get global interrupt status. It includes all hardware interrupt + * sources in the Intel HD Audio controller. + */ + if (snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, SOF_HDA_INTSTS) & + SOF_HDA_INTSTS_GIS) { + + /* disable GIE interrupt */ + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, + SOF_HDA_INTCTL, + SOF_HDA_INT_GLOBAL_EN, + 0); + + return IRQ_WAKE_THREAD; + } + + return IRQ_NONE; +} + +static irqreturn_t hda_dsp_interrupt_thread(int irq, void *context) +{ + struct snd_sof_dev *sdev = context; + + /* deal with streams and controller first */ + if (hda_dsp_check_stream_irq(sdev)) + hda_dsp_stream_threaded_handler(irq, sdev); + + if (hda_dsp_check_ipc_irq(sdev)) + sof_ops(sdev)->irq_thread(irq, sdev); + + /* enable GIE interrupt */ + snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, + SOF_HDA_INTCTL, + SOF_HDA_INT_GLOBAL_EN, + SOF_HDA_INT_GLOBAL_EN); + + return IRQ_HANDLED; +} + int hda_dsp_probe(struct snd_sof_dev *sdev) { struct pci_dev *pci = to_pci_dev(sdev->dev); @@ -603,9 +647,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) */ if (hda_use_msi && pci_alloc_irq_vectors(pci, 1, 1, PCI_IRQ_MSI) > 0) { dev_info(sdev->dev, "use msi interrupt mode\n"); - hdev->irq = pci_irq_vector(pci, 0); - /* ipc irq number is the same of hda irq */ - sdev->ipc_irq = hdev->irq; + sdev->ipc_irq = pci_irq_vector(pci, 0); /* initialised to "false" by kzalloc() */ sdev->msi_enabled = true; } @@ -616,28 +658,17 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) * in IO-APIC mode, hda->irq and ipc_irq are using the same * irq number of pci->irq */ - hdev->irq = pci->irq; sdev->ipc_irq = pci->irq; } - dev_dbg(sdev->dev, "using HDA IRQ %d\n", hdev->irq); - ret = request_threaded_irq(hdev->irq, hda_dsp_stream_interrupt, - hda_dsp_stream_threaded_handler, - IRQF_SHARED, "AudioHDA", bus); - if (ret < 0) { - dev_err(sdev->dev, "error: failed to register HDA IRQ %d\n", - hdev->irq); - goto free_irq_vector; - } - dev_dbg(sdev->dev, "using IPC IRQ %d\n", sdev->ipc_irq); - ret = request_threaded_irq(sdev->ipc_irq, hda_dsp_ipc_irq_handler, - sof_ops(sdev)->irq_thread, IRQF_SHARED, - "AudioDSP", sdev); + ret = request_threaded_irq(sdev->ipc_irq, hda_dsp_interrupt_handler, + hda_dsp_interrupt_thread, + IRQF_SHARED, "AudioDSP", sdev); if (ret < 0) { dev_err(sdev->dev, "error: failed to register IPC IRQ %d\n", sdev->ipc_irq); - goto free_hda_irq; + goto free_irq_vector; } pci_set_master(pci); @@ -668,8 +699,6 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) free_ipc_irq: free_irq(sdev->ipc_irq, sdev); -free_hda_irq: - free_irq(hdev->irq, bus); free_irq_vector: if (sdev->msi_enabled) pci_free_irq_vectors(pci); @@ -715,7 +744,6 @@ int hda_dsp_remove(struct snd_sof_dev *sdev) SOF_HDA_PPCTL_GPROCEN, 0); free_irq(sdev->ipc_irq, sdev); - free_irq(hda->irq, bus); if (sdev->msi_enabled) pci_free_irq_vectors(pci); diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 18d7e72bf9b7..de0115294c74 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -43,11 +43,14 @@ /* SOF_HDA_GCTL register bist */ #define SOF_HDA_GCTL_RESET BIT(0) -/* SOF_HDA_INCTL and SOF_HDA_INTSTS regs */ +/* SOF_HDA_INCTL regs */ #define SOF_HDA_INT_GLOBAL_EN BIT(31) #define SOF_HDA_INT_CTRL_EN BIT(30) #define SOF_HDA_INT_ALL_STREAM 0xff +/* SOF_HDA_INTSTS regs */ +#define SOF_HDA_INTSTS_GIS BIT(31) + #define SOF_HDA_MAX_CAPS 10 #define SOF_HDA_CAP_ID_OFF 16 #define SOF_HDA_CAP_ID_MASK GENMASK(SOF_HDA_CAP_ID_OFF + 11,\ @@ -345,7 +348,7 @@ /* Number of DAIs */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA) -#define SOF_SKL_NUM_DAIS 14 +#define SOF_SKL_NUM_DAIS 15 #else #define SOF_SKL_NUM_DAIS 8 #endif @@ -406,8 +409,6 @@ struct sof_intel_hda_dev { /* the maximum number of streams (playback + capture) supported */ u32 stream_max; - int irq; - /* PM related */ bool l1_support_changed;/* during suspend, is L1SEN changed or not */ @@ -511,11 +512,12 @@ int hda_dsp_stream_hw_params(struct snd_sof_dev *sdev, struct snd_pcm_hw_params *params); int hda_dsp_stream_trigger(struct snd_sof_dev *sdev, struct hdac_ext_stream *stream, int cmd); -irqreturn_t hda_dsp_stream_interrupt(int irq, void *context); irqreturn_t hda_dsp_stream_threaded_handler(int irq, void *context); int hda_dsp_stream_setup_bdl(struct snd_sof_dev *sdev, struct snd_dma_buffer *dmab, struct hdac_stream *stream); +bool hda_dsp_check_ipc_irq(struct snd_sof_dev *sdev); +bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev); struct hdac_ext_stream * hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction); @@ -540,7 +542,6 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev); int hda_dsp_ipc_get_mailbox_offset(struct snd_sof_dev *sdev); int hda_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id); -irqreturn_t hda_dsp_ipc_irq_handler(int irq, void *context); irqreturn_t hda_dsp_ipc_irq_thread(int irq, void *context); int hda_dsp_ipc_cmd_done(struct snd_sof_dev *sdev, int dir); diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c index 5fdfbaa8c4ed..c7a408f36733 100644 --- a/sound/soc/sof/ipc.c +++ b/sound/soc/sof/ipc.c @@ -346,19 +346,12 @@ void snd_sof_ipc_msgs_rx(struct snd_sof_dev *sdev) break; case SOF_IPC_FW_READY: /* check for FW boot completion */ - if (!sdev->boot_complete) { + if (sdev->fw_state == SOF_FW_BOOT_IN_PROGRESS) { err = sof_ops(sdev)->fw_ready(sdev, cmd); - if (err < 0) { - /* - * this indicates a mismatch in ABI - * between the driver and fw - */ - dev_err(sdev->dev, "error: ABI mismatch %d\n", - err); - } else { - /* firmware boot completed OK */ - sdev->boot_complete = true; - } + if (err < 0) + sdev->fw_state = SOF_FW_BOOT_READY_FAILED; + else + sdev->fw_state = SOF_FW_BOOT_COMPLETE; /* wake up firmware loader */ wake_up(&sdev->boot_wait); @@ -502,7 +495,7 @@ int snd_sof_ipc_stream_posn(struct snd_sof_dev *sdev, /* send IPC to the DSP */ err = sof_ipc_tx_message(sdev->ipc, - stream.hdr.cmd, &stream, sizeof(stream), &posn, + stream.hdr.cmd, &stream, sizeof(stream), posn, sizeof(*posn)); if (err < 0) { dev_err(sdev->dev, "error: failed to get stream %d position\n", diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index 432d12bd4937..31847aa3975d 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -512,7 +512,6 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) int init_core_mask; init_waitqueue_head(&sdev->boot_wait); - sdev->boot_complete = false; /* create read-only fw_version debugfs to store boot version info */ if (sdev->first_boot) { @@ -544,19 +543,27 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev) init_core_mask = ret; - /* now wait for the DSP to boot */ - ret = wait_event_timeout(sdev->boot_wait, sdev->boot_complete, + /* + * now wait for the DSP to boot. There are 3 possible outcomes: + * 1. Boot wait times out indicating FW boot failure. + * 2. FW boots successfully and fw_ready op succeeds. + * 3. FW boots but fw_ready op fails. + */ + ret = wait_event_timeout(sdev->boot_wait, + sdev->fw_state > SOF_FW_BOOT_IN_PROGRESS, msecs_to_jiffies(sdev->boot_timeout)); if (ret == 0) { dev_err(sdev->dev, "error: firmware boot failure\n"); snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX | SOF_DBG_TEXT | SOF_DBG_PCI); - /* after this point FW_READY msg should be ignored */ - sdev->boot_complete = true; + sdev->fw_state = SOF_FW_BOOT_FAILED; return -EIO; } - dev_info(sdev->dev, "firmware boot complete\n"); + if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) + dev_info(sdev->dev, "firmware boot complete\n"); + else + return -EIO; /* FW boots but fw_ready op failed */ /* perform post fw run operations */ ret = snd_sof_dsp_post_fw_run(sdev); diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c index 0fd5567237a8..bc09cb5f458b 100644 --- a/sound/soc/sof/pm.c +++ b/sound/soc/sof/pm.c @@ -269,6 +269,10 @@ static int sof_resume(struct device *dev, bool runtime_resume) if (!sof_ops(sdev)->resume || !sof_ops(sdev)->runtime_resume) return 0; + /* DSP was never successfully started, nothing to resume */ + if (sdev->first_boot) + return 0; + /* * if the runtime_resume flag is set, call the runtime_resume routine * or else call the system resume routine @@ -283,6 +287,8 @@ static int sof_resume(struct device *dev, bool runtime_resume) return ret; } + sdev->fw_state = SOF_FW_BOOT_PREPARE; + /* load the firmware */ ret = snd_sof_load_firmware(sdev); if (ret < 0) { @@ -292,7 +298,12 @@ static int sof_resume(struct device *dev, bool runtime_resume) return ret; } - /* boot the firmware */ + sdev->fw_state = SOF_FW_BOOT_IN_PROGRESS; + + /* + * Boot the firmware. The FW boot status will be modified + * in snd_sof_run_firmware() depending on the outcome. + */ ret = snd_sof_run_firmware(sdev); if (ret < 0) { dev_err(sdev->dev, @@ -341,6 +352,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) if (!sof_ops(sdev)->suspend) return 0; + if (sdev->fw_state != SOF_FW_BOOT_COMPLETE) + goto power_down; + /* release trace */ snd_sof_release_trace(sdev); @@ -378,6 +392,12 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) ret); } +power_down: + + /* return if the DSP was not probed successfully */ + if (sdev->fw_state == SOF_FW_BOOT_NOT_STARTED) + return 0; + /* power down all DSP cores */ if (runtime_suspend) ret = snd_sof_dsp_runtime_suspend(sdev); @@ -388,6 +408,9 @@ static int sof_suspend(struct device *dev, bool runtime_suspend) "error: failed to power down DSP during suspend %d\n", ret); + /* reset FW state */ + sdev->fw_state = SOF_FW_BOOT_NOT_STARTED; + return ret; } diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index c7c2c70ee4d0..59cc711e99ff 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -387,6 +387,15 @@ struct snd_sof_dai { struct list_head list; /* list in sdev dai list */ }; +enum snd_sof_fw_state { + SOF_FW_BOOT_NOT_STARTED = 0, + SOF_FW_BOOT_PREPARE, + SOF_FW_BOOT_IN_PROGRESS, + SOF_FW_BOOT_FAILED, + SOF_FW_BOOT_READY_FAILED, /* firmware booted but fw_ready op failed */ + SOF_FW_BOOT_COMPLETE, +}; + /* * SOF Device Level. */ @@ -408,7 +417,7 @@ struct snd_sof_dev { /* DSP firmware boot */ wait_queue_head_t boot_wait; - u32 boot_complete; + enum snd_sof_fw_state fw_state; u32 first_boot; /* work queue in case the probe is implemented in two steps */ diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index 30bcd5d3a32a..10eb4b8e8e7e 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -1543,20 +1543,20 @@ static int stm32_sai_sub_probe(struct platform_device *pdev) return ret; } - ret = devm_snd_soc_register_component(&pdev->dev, &stm32_component, - &sai->cpu_dai_drv, 1); + ret = snd_dmaengine_pcm_register(&pdev->dev, conf, 0); + if (ret) { + dev_err(&pdev->dev, "Could not register pcm dma\n"); + return ret; + } + + ret = snd_soc_register_component(&pdev->dev, &stm32_component, + &sai->cpu_dai_drv, 1); if (ret) return ret; if (STM_SAI_PROTOCOL_IS_SPDIF(sai)) conf = &stm32_sai_pcm_config_spdif; - ret = devm_snd_dmaengine_pcm_register(&pdev->dev, conf, 0); - if (ret) { - dev_err(&pdev->dev, "Could not register pcm dma\n"); - return ret; - } - return 0; } @@ -1565,6 +1565,8 @@ static int stm32_sai_sub_remove(struct platform_device *pdev) struct stm32_sai_sub_data *sai = dev_get_drvdata(&pdev->dev); clk_unprepare(sai->pdata->pclk); + snd_dmaengine_pcm_unregister(&pdev->dev); + snd_soc_unregister_component(&pdev->dev); return 0; } diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 55798bc8eae2..686561df8e13 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -80,6 +80,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -241,7 +242,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c index dbed3c5408e7..d59882ec48f1 100644 --- a/sound/soc/tegra/tegra30_i2s.c +++ b/sound/soc/tegra/tegra30_i2s.c @@ -127,7 +127,7 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, struct device *dev = dai->dev; struct tegra30_i2s *i2s = snd_soc_dai_get_drvdata(dai); unsigned int mask, val, reg; - int ret, sample_size, srate, i2sclock, bitcnt, audio_bits; + int ret, sample_size, srate, i2sclock, bitcnt; struct tegra30_ahub_cif_conf cif_conf; if (params_channels(params) != 2) @@ -137,19 +137,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: val = TEGRA30_I2S_CTRL_BIT_SIZE_16; - audio_bits = TEGRA30_AUDIOCIF_BITS_16; sample_size = 16; break; - case SNDRV_PCM_FORMAT_S24_LE: - val = TEGRA30_I2S_CTRL_BIT_SIZE_24; - audio_bits = TEGRA30_AUDIOCIF_BITS_24; - sample_size = 24; - break; - case SNDRV_PCM_FORMAT_S32_LE: - val = TEGRA30_I2S_CTRL_BIT_SIZE_32; - audio_bits = TEGRA30_AUDIOCIF_BITS_32; - sample_size = 32; - break; default: return -EINVAL; } @@ -181,8 +170,8 @@ static int tegra30_i2s_hw_params(struct snd_pcm_substream *substream, cif_conf.threshold = 0; cif_conf.audio_channels = 2; cif_conf.client_channels = 2; - cif_conf.audio_bits = audio_bits; - cif_conf.client_bits = audio_bits; + cif_conf.audio_bits = TEGRA30_AUDIOCIF_BITS_16; + cif_conf.client_bits = TEGRA30_AUDIOCIF_BITS_16; cif_conf.expand = 0; cif_conf.stereo_conv = 0; cif_conf.replicate = 0; @@ -317,18 +306,14 @@ static const struct snd_soc_dai_driver tegra30_i2s_dai_template = { .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_96000, - .formats = SNDRV_PCM_FMTBIT_S32_LE | - SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S16_LE, + .formats = SNDRV_PCM_FMTBIT_S16_LE, }, .capture = { .stream_name = "Capture", .channels_min = 2, .channels_max = 2, .rates = SNDRV_PCM_RATE_8000_96000, - .formats = SNDRV_PCM_FMTBIT_S32_LE | - SNDRV_PCM_FMTBIT_S24_LE | - SNDRV_PCM_FMTBIT_S16_LE, + .formats = SNDRV_PCM_FMTBIT_S16_LE, }, .ops = &tegra30_i2s_dai_ops, .symmetric_rates = 1, diff --git a/sound/soc/txx9/txx9aclc.c b/sound/soc/txx9/txx9aclc.c index 33c78d33e5a1..9a55926ebf07 100644 --- a/sound/soc/txx9/txx9aclc.c +++ b/sound/soc/txx9/txx9aclc.c @@ -51,7 +51,6 @@ static int txx9aclc_pcm_hw_params(struct snd_soc_component *component, struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { - struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); struct snd_pcm_runtime *runtime = substream->runtime; struct txx9aclc_dmadata *dmadata = runtime->private_data; int ret; diff --git a/sound/usb/card.c b/sound/usb/card.c index 9f743ebae615..827fb0bc8b56 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -600,6 +600,10 @@ static int usb_audio_probe(struct usb_interface *intf, } } if (! chip) { + err = snd_usb_apply_boot_quirk_once(dev, intf, quirk, id); + if (err < 0) + goto __error; + /* it's a fresh one. * now look for an empty slot and create a new card instance */ diff --git a/sound/usb/clock.c b/sound/usb/clock.c index 018b1ecb5404..a48313dfa967 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -151,8 +151,34 @@ static int uac_clock_selector_set_val(struct snd_usb_audio *chip, int selector_i return ret; } +/* + * Assume the clock is valid if clock source supports only one single sample + * rate, the terminal is connected directly to it (there is no clock selector) + * and clock type is internal. This is to deal with some Denon DJ controllers + * that always reports that clock is invalid. + */ +static bool uac_clock_source_is_valid_quirk(struct snd_usb_audio *chip, + struct audioformat *fmt, + int source_id) +{ + if (fmt->protocol == UAC_VERSION_2) { + struct uac_clock_source_descriptor *cs_desc = + snd_usb_find_clock_source(chip->ctrl_intf, source_id); + + if (!cs_desc) + return false; + + return (fmt->nr_rates == 1 && + (fmt->clock & 0xff) == cs_desc->bClockID && + (cs_desc->bmAttributes & 0x3) != + UAC_CLOCK_SOURCE_TYPE_EXT); + } + + return false; +} + static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, - int protocol, + struct audioformat *fmt, int source_id) { int err; @@ -160,7 +186,7 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, struct usb_device *dev = chip->dev; u32 bmControls; - if (protocol == UAC_VERSION_3) { + if (fmt->protocol == UAC_VERSION_3) { struct uac3_clock_source_descriptor *cs_desc = snd_usb_find_clock_source_v3(chip->ctrl_intf, source_id); @@ -194,10 +220,14 @@ static bool uac_clock_source_is_valid(struct snd_usb_audio *chip, return false; } - return data ? true : false; + if (data) + return true; + else + return uac_clock_source_is_valid_quirk(chip, fmt, source_id); } -static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, +static int __uac_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, int entity_id, unsigned long *visited, bool validate) { struct uac_clock_source_descriptor *source; @@ -217,7 +247,7 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, source = snd_usb_find_clock_source(chip->ctrl_intf, entity_id); if (source) { entity_id = source->bClockID; - if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_2, + if (validate && !uac_clock_source_is_valid(chip, fmt, entity_id)) { usb_audio_err(chip, "clock source %d is not valid, cannot use\n", @@ -248,8 +278,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, } cur = ret; - ret = __uac_clock_find_source(chip, selector->baCSourceID[ret - 1], - visited, validate); + ret = __uac_clock_find_source(chip, fmt, + selector->baCSourceID[ret - 1], + visited, validate); if (!validate || ret > 0 || !chip->autoclock) return ret; @@ -260,8 +291,9 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, if (i == cur) continue; - ret = __uac_clock_find_source(chip, selector->baCSourceID[i - 1], - visited, true); + ret = __uac_clock_find_source(chip, fmt, + selector->baCSourceID[i - 1], + visited, true); if (ret < 0) continue; @@ -281,14 +313,16 @@ static int __uac_clock_find_source(struct snd_usb_audio *chip, int entity_id, /* FIXME: multipliers only act as pass-thru element for now */ multiplier = snd_usb_find_clock_multiplier(chip->ctrl_intf, entity_id); if (multiplier) - return __uac_clock_find_source(chip, multiplier->bCSourceID, - visited, validate); + return __uac_clock_find_source(chip, fmt, + multiplier->bCSourceID, + visited, validate); return -EINVAL; } -static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, - unsigned long *visited, bool validate) +static int __uac3_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, int entity_id, + unsigned long *visited, bool validate) { struct uac3_clock_source_descriptor *source; struct uac3_clock_selector_descriptor *selector; @@ -307,7 +341,7 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, source = snd_usb_find_clock_source_v3(chip->ctrl_intf, entity_id); if (source) { entity_id = source->bClockID; - if (validate && !uac_clock_source_is_valid(chip, UAC_VERSION_3, + if (validate && !uac_clock_source_is_valid(chip, fmt, entity_id)) { usb_audio_err(chip, "clock source %d is not valid, cannot use\n", @@ -338,7 +372,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, } cur = ret; - ret = __uac3_clock_find_source(chip, selector->baCSourceID[ret - 1], + ret = __uac3_clock_find_source(chip, fmt, + selector->baCSourceID[ret - 1], visited, validate); if (!validate || ret > 0 || !chip->autoclock) return ret; @@ -350,8 +385,9 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, if (i == cur) continue; - ret = __uac3_clock_find_source(chip, selector->baCSourceID[i - 1], - visited, true); + ret = __uac3_clock_find_source(chip, fmt, + selector->baCSourceID[i - 1], + visited, true); if (ret < 0) continue; @@ -372,7 +408,8 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, multiplier = snd_usb_find_clock_multiplier_v3(chip->ctrl_intf, entity_id); if (multiplier) - return __uac3_clock_find_source(chip, multiplier->bCSourceID, + return __uac3_clock_find_source(chip, fmt, + multiplier->bCSourceID, visited, validate); return -EINVAL; @@ -389,18 +426,18 @@ static int __uac3_clock_find_source(struct snd_usb_audio *chip, int entity_id, * * Returns the clock source UnitID (>=0) on success, or an error. */ -int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol, - int entity_id, bool validate) +int snd_usb_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, bool validate) { DECLARE_BITMAP(visited, 256); memset(visited, 0, sizeof(visited)); - switch (protocol) { + switch (fmt->protocol) { case UAC_VERSION_2: - return __uac_clock_find_source(chip, entity_id, visited, + return __uac_clock_find_source(chip, fmt, fmt->clock, visited, validate); case UAC_VERSION_3: - return __uac3_clock_find_source(chip, entity_id, visited, + return __uac3_clock_find_source(chip, fmt, fmt->clock, visited, validate); default: return -EINVAL; @@ -501,8 +538,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, * automatic clock selection if the current clock is not * valid. */ - clock = snd_usb_clock_find_source(chip, fmt->protocol, - fmt->clock, true); + clock = snd_usb_clock_find_source(chip, fmt, true); if (clock < 0) { /* We did not find a valid clock, but that might be * because the current sample rate does not match an @@ -510,8 +546,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, * and we will do another validation after setting the * rate. */ - clock = snd_usb_clock_find_source(chip, fmt->protocol, - fmt->clock, false); + clock = snd_usb_clock_find_source(chip, fmt, false); if (clock < 0) return clock; } @@ -577,7 +612,7 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, int iface, validation: /* validate clock after rate change */ - if (!uac_clock_source_is_valid(chip, fmt->protocol, clock)) + if (!uac_clock_source_is_valid(chip, fmt, clock)) return -ENXIO; return 0; } diff --git a/sound/usb/clock.h b/sound/usb/clock.h index 076e31b79ee0..68df0fbe09d0 100644 --- a/sound/usb/clock.h +++ b/sound/usb/clock.h @@ -6,7 +6,7 @@ int snd_usb_init_sample_rate(struct snd_usb_audio *chip, int iface, struct usb_host_interface *alts, struct audioformat *fmt, int rate); -int snd_usb_clock_find_source(struct snd_usb_audio *chip, int protocol, - int entity_id, bool validate); +int snd_usb_clock_find_source(struct snd_usb_audio *chip, + struct audioformat *fmt, bool validate); #endif /* __USBAUDIO_CLOCK_H */ diff --git a/sound/usb/format.c b/sound/usb/format.c index d79db71305f6..f4f0cf3deaf0 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -296,6 +296,9 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, case USB_ID(0x0E41, 0x4242): /* Line6 Helix Rack */ case USB_ID(0x0E41, 0x4244): /* Line6 Helix LT */ case USB_ID(0x0E41, 0x4246): /* Line6 HX-Stomp */ + case USB_ID(0x0E41, 0x4248): /* Line6 Helix >= fw 2.82 */ + case USB_ID(0x0E41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ + case USB_ID(0x0E41, 0x424a): /* Line6 Helix LT >= fw 2.82 */ /* supported rates: 48Khz */ kfree(fp->rate_table); fp->rate_table = kmalloc(sizeof(int), GFP_KERNEL); @@ -322,8 +325,7 @@ static int parse_audio_format_rates_v2v3(struct snd_usb_audio *chip, struct usb_device *dev = chip->dev; unsigned char tmp[2], *data; int nr_triplets, data_size, ret = 0, ret_l6; - int clock = snd_usb_clock_find_source(chip, fp->protocol, - fp->clock, false); + int clock = snd_usb_clock_find_source(chip, fp, false); if (clock < 0) { dev_err(&dev->dev, diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b5a3f754a4f1..4f096685ed65 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -305,7 +305,7 @@ static void line6_data_received(struct urb *urb) line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN); - if (done == 0) + if (done <= 0) break; line6->message_length = done; diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c index 8d6eefa0d936..6a70463f82c4 100644 --- a/sound/usb/line6/midibuf.c +++ b/sound/usb/line6/midibuf.c @@ -159,7 +159,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, int midi_length_prev = midibuf_message_length(this->command_prev); - if (midi_length_prev > 0) { + if (midi_length_prev > 1) { midi_length = midi_length_prev - 1; repeat = 1; } else diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 6cd4ff09c5ee..d2a050bb8341 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -897,6 +897,15 @@ static int parse_term_proc_unit(struct mixer_build *state, return 0; } +static int parse_term_effect_unit(struct mixer_build *state, + struct usb_audio_term *term, + void *p1, int id) +{ + term->type = UAC3_EFFECT_UNIT << 16; /* virtual type */ + term->id = id; + return 0; +} + static int parse_term_uac2_clock_source(struct mixer_build *state, struct usb_audio_term *term, void *p1, int id) @@ -981,8 +990,7 @@ static int __check_input_term(struct mixer_build *state, int id, UAC3_PROCESSING_UNIT); case PTYPE(UAC_VERSION_2, UAC2_EFFECT_UNIT): case PTYPE(UAC_VERSION_3, UAC3_EFFECT_UNIT): - return parse_term_proc_unit(state, term, p1, id, - UAC3_EFFECT_UNIT); + return parse_term_effect_unit(state, term, p1, id); case PTYPE(UAC_VERSION_1, UAC1_EXTENSION_UNIT): case PTYPE(UAC_VERSION_2, UAC2_EXTENSION_UNIT_V2): case PTYPE(UAC_VERSION_3, UAC3_EXTENSION_UNIT): diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c index 94b903d95afa..74c00c905d24 100644 --- a/sound/usb/mixer_scarlett_gen2.c +++ b/sound/usb/mixer_scarlett_gen2.c @@ -558,11 +558,11 @@ static const struct scarlett2_config /* proprietary request/response format */ struct scarlett2_usb_packet { - u32 cmd; - u16 size; - u16 seq; - u32 error; - u32 pad; + __le32 cmd; + __le16 size; + __le16 seq; + __le32 error; + __le32 pad; u8 data[]; }; @@ -664,11 +664,11 @@ static int scarlett2_usb( "Scarlett Gen 2 USB invalid response; " "cmd tx/rx %d/%d seq %d/%d size %d/%d " "error %d pad %d\n", - le16_to_cpu(req->cmd), le16_to_cpu(resp->cmd), + le32_to_cpu(req->cmd), le32_to_cpu(resp->cmd), le16_to_cpu(req->seq), le16_to_cpu(resp->seq), resp_size, le16_to_cpu(resp->size), - le16_to_cpu(resp->error), - le16_to_cpu(resp->pad)); + le32_to_cpu(resp->error), + le32_to_cpu(resp->pad)); err = -EINVAL; goto unlock; } @@ -687,7 +687,7 @@ static int scarlett2_usb( /* Send SCARLETT2_USB_DATA_CMD SCARLETT2_USB_CONFIG_SAVE */ static void scarlett2_config_save(struct usb_mixer_interface *mixer) { - u32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); + __le32 req = cpu_to_le32(SCARLETT2_USB_CONFIG_SAVE); scarlett2_usb(mixer, SCARLETT2_USB_DATA_CMD, &req, sizeof(u32), @@ -713,11 +713,11 @@ static int scarlett2_usb_set_config( const struct scarlett2_config config_item = scarlett2_config_items[config_item_num]; struct { - u32 offset; - u32 bytes; - s32 value; + __le32 offset; + __le32 bytes; + __le32 value; } __packed req; - u32 req2; + __le32 req2; int err; struct scarlett2_mixer_data *private = mixer->private_data; @@ -753,8 +753,8 @@ static int scarlett2_usb_get( int offset, void *buf, int size) { struct { - u32 offset; - u32 size; + __le32 offset; + __le32 size; } __packed req; req.offset = cpu_to_le32(offset); @@ -794,8 +794,8 @@ static int scarlett2_usb_set_mix(struct usb_mixer_interface *mixer, const struct scarlett2_device_info *info = private->info; struct { - u16 mix_num; - u16 data[SCARLETT2_INPUT_MIX_MAX]; + __le16 mix_num; + __le16 data[SCARLETT2_INPUT_MIX_MAX]; } __packed req; int i, j; @@ -850,9 +850,9 @@ static int scarlett2_usb_set_mux(struct usb_mixer_interface *mixer) }; struct { - u16 pad; - u16 num; - u32 data[SCARLETT2_MUX_MAX]; + __le16 pad; + __le16 num; + __le32 data[SCARLETT2_MUX_MAX]; } __packed req; req.pad = 0; @@ -911,9 +911,9 @@ static int scarlett2_usb_get_meter_levels(struct usb_mixer_interface *mixer, u16 *levels) { struct { - u16 pad; - u16 num_meters; - u32 magic; + __le16 pad; + __le16 num_meters; + __le32 magic; } __packed req; u32 resp[SCARLETT2_NUM_METERS]; int i, err; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 0e4eab96e23e..c9e1609296df 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -348,6 +348,10 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs, ep = 0x84; ifnum = 0; goto add_sync_ep_from_ifnum; + case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ + ep = 0x81; + ifnum = 2; + goto add_sync_ep_from_ifnum; case USB_ID(0x0582, 0x01d8): /* BOSS Katana */ /* BOSS Katana amplifiers do not need quirks */ return 0; diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 82184036437b..7448ab07bd36 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1113,6 +1113,31 @@ static int snd_usb_motu_microbookii_boot_quirk(struct usb_device *dev) return err; } +static int snd_usb_motu_m_series_boot_quirk(struct usb_device *dev) +{ + int ret; + + if (snd_usb_pipe_sanity_check(dev, usb_sndctrlpipe(dev, 0))) + return -EINVAL; + ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), + 1, USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0x0, 0, NULL, 0, 1000); + + if (ret < 0) + return ret; + + msleep(2000); + + ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), + 1, USB_TYPE_VENDOR | USB_RECIP_DEVICE, + 0x20, 0, NULL, 0, 1000); + + if (ret < 0) + return ret; + + return 0; +} + /* * Setup quirks */ @@ -1297,6 +1322,19 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev, return 0; } +int snd_usb_apply_boot_quirk_once(struct usb_device *dev, + struct usb_interface *intf, + const struct snd_usb_audio_quirk *quirk, + unsigned int id) +{ + switch (id) { + case USB_ID(0x07fd, 0x0008): /* MOTU M Series */ + return snd_usb_motu_m_series_boot_quirk(dev); + } + + return 0; +} + /* * check if the device uses big-endian samples */ @@ -1402,6 +1440,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } diff --git a/sound/usb/quirks.h b/sound/usb/quirks.h index a80e0ddd0736..df0355843a4c 100644 --- a/sound/usb/quirks.h +++ b/sound/usb/quirks.h @@ -20,6 +20,11 @@ int snd_usb_apply_boot_quirk(struct usb_device *dev, const struct snd_usb_audio_quirk *quirk, unsigned int usb_id); +int snd_usb_apply_boot_quirk_once(struct usb_device *dev, + struct usb_interface *intf, + const struct snd_usb_audio_quirk *quirk, + unsigned int usb_id); + void snd_usb_set_format_quirk(struct snd_usb_substream *subs, struct audioformat *fmt); diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index d1caa8ed9e68..9985fc139487 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -119,7 +119,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw, info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; - info->version = USX2Y_DRIVER_VERSION; + info->version = USX2Y_DRIVER_VERSION; return 0; } diff --git a/sound/usb/validate.c b/sound/usb/validate.c index 36ae78c3da3d..c43d8c3895a9 100644 --- a/sound/usb/validate.c +++ b/sound/usb/validate.c @@ -110,7 +110,7 @@ static bool validate_processing_unit(const void *p, default: if (v->type == UAC1_EXTENSION_UNIT) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC_PROCESS_UP_DOWNMIX: case UAC_PROCESS_DOLBY_PROLOGIC: if (d->bLength < len + 1) /* bNrModes */ @@ -125,7 +125,7 @@ static bool validate_processing_unit(const void *p, case UAC_VERSION_2: if (v->type == UAC2_EXTENSION_UNIT_V2) return true; /* OK */ - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC2_PROCESS_UP_DOWNMIX: case UAC2_PROCESS_DOLBY_PROLOCIC: /* SiC! */ if (d->bLength < len + 1) /* bNrModes */ @@ -142,7 +142,7 @@ static bool validate_processing_unit(const void *p, len += 2; /* wClusterDescrID */ break; } - switch (d->wProcessType) { + switch (le16_to_cpu(d->wProcessType)) { case UAC3_PROCESS_UP_DOWNMIX: if (d->bLength < len + 1) /* bNrModes */ return false; diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index 8908c58bd6cd..53adc1762ec0 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -929,7 +929,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 1ef45e55039e..2f017caa678d 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -117,6 +117,25 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) return prog_cnt; } +static int cgroup_has_attached_progs(int cgroup_fd) +{ + enum bpf_attach_type type; + bool no_prog = true; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + int count = count_attached_bpf_progs(cgroup_fd, type); + + if (count < 0 && errno != EINVAL) + return -1; + + if (count > 0) { + no_prog = false; + break; + } + } + + return no_prog ? 0 : 1; +} static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { @@ -161,6 +180,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { enum bpf_attach_type type; + int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; @@ -192,6 +212,16 @@ static int do_show(int argc, char **argv) goto exit; } + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + path, strerror(errno)); + goto exit_cgroup; + } else if (!has_attached_progs) { + ret = 0; + goto exit_cgroup; + } + if (json_output) jsonw_start_array(json_wtr); else @@ -212,6 +242,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); +exit_cgroup: close(cgroup_fd); exit: return ret; @@ -228,7 +259,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { enum bpf_attach_type type; - bool skip = true; + int has_attached_progs; int cgroup_fd; if (typeflag != FTW_D) @@ -240,22 +271,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, return SHOW_TREE_FN_ERR; } - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); - - if (count < 0 && errno != EINVAL) { - p_err("can't query bpf programs attached to %s: %s", - fpath, strerror(errno)); - close(cgroup_fd); - return SHOW_TREE_FN_ERR; - } - if (count > 0) { - skip = false; - break; - } - } - - if (skip) { + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + fpath, strerror(errno)); + close(cgroup_fd); + return SHOW_TREE_FN_ERR; + } else if (!has_attached_progs) { close(cgroup_fd); return 0; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ce9c5ba1934..9288be1d6bf0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -500,7 +500,7 @@ static int do_dump(int argc, char **argv) buf = (unsigned char *)(info->jited_prog_insns); member_len = info->jited_prog_len; } else { /* DUMP_XLATED */ - if (info->xlated_prog_len == 0) { + if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) { p_err("error retrieving insn dump: kernel.kptr_restrict set?"); goto err_free; } diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 980cb9266718..5e9e781905ed 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -17,7 +17,15 @@ int strtobool(const char *s, bool *res); * However uClibc headers also define __GLIBC__ hence the hack below */ #if defined(__GLIBC__) && !defined(__UCLIBC__) +// pragma diagnostic was introduced in gcc 4.6 +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif extern size_t strlcpy(char *dest, const char *src, size_t size); +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic pop +#endif #endif char *str_error_r(int errnum, char *buf, size_t buflen); diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index ad1b9e646c49..4cf93110c259 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -270,6 +270,7 @@ class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons def debugfs_is_child(self, field): @@ -289,6 +290,7 @@ class ArchPPC(Arch): # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reason_field = 'exit_nr' self.exit_reasons = {} def debugfs_is_child(self, field): @@ -300,6 +302,7 @@ class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS def debugfs_is_child(self, field): @@ -311,6 +314,7 @@ class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = None self.exit_reasons = None def debugfs_is_child(self, field): @@ -541,8 +545,8 @@ class TracepointProvider(Provider): """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if ARCH.exit_reasons: - filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + if ARCH.exit_reason_field and ARCH.exit_reasons: + filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) return filters def _get_available_fields(self): diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 11b3885e833e..027b18f7ed8c 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 97830e46d1a0..68e8702f6f10 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -148,6 +148,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so "versioned in $(VERSION_SCRIPT)." >&2; \ readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf -s --wide $(OUTPUT)libbpf.so | \ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f09772192f1..6d1bb761a544 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1242,15 +1242,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found key [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %zd.\n", + map_name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -1285,15 +1285,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found value [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %zd.\n", + map_name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -1817,7 +1817,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + pr_warn("bad call relo offset: %zu\n", + (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -1859,8 +1860,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); + pr_warn("map relo failed to find map for sec %u, off %zu\n", + shdr_idx, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -1941,9 +1942,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : ""; - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), GELF_ST_BIND(sym.st_info), sym.st_name, name, insn_idx); @@ -2743,7 +2744,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; @@ -5944,7 +5947,7 @@ struct perf_buffer { size_t mmap_size; struct perf_cpu_buf **cpu_bufs; struct epoll_event *events; - int cpu_cnt; + int cpu_cnt; /* number of allocated CPU buffers */ int epoll_fd; /* perf event FD */ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ }; @@ -6078,11 +6081,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { + const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map = {}; char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; + bool *online = NULL; __u32 map_info_len; - int err, i; + int err, i, j, n; if (page_cnt & (page_cnt - 1)) { pr_warn("page count should be power of two, but is %zu\n", @@ -6151,20 +6156,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - for (i = 0; i < pb->cpu_cnt; i++) { + err = parse_cpu_mask_file(online_cpus_file, &online, &n); + if (err) { + pr_warn("failed to get online CPU mask: %d\n", err); + goto error; + } + + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf; int cpu, map_key; cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + /* in case user didn't explicitly requested particular CPUs to + * be attached to, skip offline/not present CPUs + */ + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) + continue; + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); if (IS_ERR(cpu_buf)) { err = PTR_ERR(cpu_buf); goto error; } - pb->cpu_bufs[i] = cpu_buf; + pb->cpu_bufs[j] = cpu_buf; err = bpf_map_update_elem(pb->map_fd, &map_key, &cpu_buf->fd, 0); @@ -6176,21 +6193,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; + pb->events[j].events = EPOLLIN; + pb->events[j].data.ptr = cpu_buf; if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { + &pb->events[j]) < 0) { err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } + j++; } + pb->cpu_cnt = j; + free(online); return pb; error: + free(online); if (pb) perf_buffer__free(pb); return ERR_PTR(err); @@ -6521,61 +6542,104 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } -int libbpf_num_possible_cpus(void) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; + int err = 0, n, len, start, end = -1; + bool *tmp; - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; + *mask = NULL; + *mask_sz = 0; + + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + while (*s) { + if (*s == ',' || *s == '\n') { + s++; + continue; + } + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); + if (n <= 0 || n > 2) { + pr_warn("Failed to get CPU range %s: %d\n", s, n); + err = -EINVAL; + goto cleanup; + } else if (n == 1) { + end = start; + } + if (start < 0 || start > end) { + pr_warn("Invalid CPU range [%d,%d] in %s\n", + start, end, s); + err = -EINVAL; + goto cleanup; + } + tmp = realloc(*mask, end + 1); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *mask = tmp; + memset(tmp + *mask_sz, 0, start - *mask_sz); + memset(tmp + start, 1, end - start + 1); + *mask_sz = end + 1; + s += len; + } + if (!*mask_sz) { + pr_warn("Empty CPU range\n"); + return -EINVAL; + } + return 0; +cleanup: + free(*mask); + *mask = NULL; + return err; +} + +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) +{ + int fd, err = 0, len; + char buf[128]; fd = open(fcpu, O_RDONLY); if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; + err = -errno; + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; + err = len ? -errno : -EINVAL; + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + return err; } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; + if (len >= sizeof(buf)) { + pr_warn("CPU mask is too big in file %s\n", fcpu); + return -E2BIG; } buf[len] = '\0'; - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; + return parse_cpu_mask_str(buf, mask, mask_sz); +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + static int cpus; + int err, n, i, tmp_cpus; + bool *mask; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + err = parse_cpu_mask_file(fcpu, &mask, &n); + if (err) + return err; + + tmp_cpus = 0; + for (i = 0; i < n; i++) { + if (mask[i]) + tmp_cpus++; } + free(mask); WRITE_ONCE(cpus, tmp_cpus); return tmp_cpus; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 97ac17a64a58..3683af9dc23e 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts, for (i = opts_sz; i < user_sz; i++) { if (opts[i]) { - pr_warn("%s has non-zero extra bytes", + pr_warn("%s has non-zero extra bytes\n", type_name); return false; } @@ -95,6 +95,8 @@ static inline bool libbpf_validate_opts(const char *opts, #define OPTS_GET(opts, field, fallback_value) \ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); diff --git a/tools/lib/string.c b/tools/lib/string.c index f2ae1b87c719..f645343815de 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -96,6 +96,10 @@ int strtobool(const char *s, bool *res) * If libc has strlcpy() then that version will override this * implementation: */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wignored-attributes" +#endif size_t __weak strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -107,6 +111,9 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif /** * skip_spaces - Removes leading whitespace from @str. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index d2a19b0bc05a..ee08aeff30a1 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -2,10 +2,6 @@ include ../scripts/Makefile.include include ../scripts/Makefile.arch -ifeq ($(ARCH),x86_64) -ARCH := x86 -endif - # always use the host compiler HOSTAR ?= ar HOSTCC ?= gcc @@ -33,7 +29,7 @@ all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ - -I$(srctree)/tools/arch/$(ARCH)/include + -I$(srctree)/tools/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh index 9bd04bbed01e..2a1261bfbb62 100755 --- a/tools/objtool/sync-check.sh +++ b/tools/objtool/sync-check.sh @@ -48,5 +48,3 @@ check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[ check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"' check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"' check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"' - -cd - diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index ede040cf82ad..20e9a189ad92 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -865,9 +865,12 @@ static int cs_etm_read_finish(struct auxtrace_record *itr, int idx) struct evsel *evsel; evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->cs_etm_pmu->type) + if (evsel->core.attr.type == ptr->cs_etm_pmu->type) { + if (evsel->disabled) + return 0; return perf_evlist__enable_event_idx(ptr->evlist, evsel, idx); + } } return -EINVAL; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index eba6541ec0f1..1d993c27242b 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -165,9 +165,12 @@ static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) struct evsel *evsel; evlist__for_each_entry(sper->evlist, evsel) { - if (evsel->core.attr.type == sper->arm_spe_pmu->type) + if (evsel->core.attr.type == sper->arm_spe_pmu->type) { + if (evsel->disabled) + return 0; return perf_evlist__enable_event_idx(sper->evlist, evsel, idx); + } } return -EINVAL; } diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 27d9e214d068..39e363151ad7 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -420,9 +420,12 @@ static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) struct evsel *evsel; evlist__for_each_entry(btsr->evlist, evsel) { - if (evsel->core.attr.type == btsr->intel_bts_pmu->type) + if (evsel->core.attr.type == btsr->intel_bts_pmu->type) { + if (evsel->disabled) + return 0; return perf_evlist__enable_event_idx(btsr->evlist, evsel, idx); + } } return -EINVAL; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 20df442fdf36..be07d6886256 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -1173,9 +1173,12 @@ static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) struct evsel *evsel; evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->intel_pt_pmu->type) + if (evsel->core.attr.type == ptr->intel_pt_pmu->type) { + if (evsel->disabled) + return 0; return perf_evlist__enable_event_idx(ptr->evlist, evsel, idx); + } } return -EINVAL; } diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index df810096abfe..58906e9499bb 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; static const struct option options[] = { @@ -141,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv) sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e69f44941aad..f2e9d2b1b913 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -595,8 +595,8 @@ tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, { struct c2c_hist_entry *c2c_left; struct c2c_hist_entry *c2c_right; - unsigned int tot_hitm_left; - unsigned int tot_hitm_right; + uint64_t tot_hitm_left; + uint64_t tot_hitm_right; c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); @@ -629,7 +629,8 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ \ c2c_left = container_of(left, struct c2c_hist_entry, he); \ c2c_right = container_of(right, struct c2c_hist_entry, he); \ - return c2c_left->stats.__f - c2c_right->stats.__f; \ + return (uint64_t) c2c_left->stats.__f - \ + (uint64_t) c2c_right->stats.__f; \ } #define STAT_FN(__f) \ @@ -682,7 +683,8 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); - return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); + return (uint64_t) llc_miss(&c2c_left->stats) - + (uint64_t) llc_miss(&c2c_right->stats); } static uint64_t total_records(struct c2c_stats *stats) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index de988589d99b..66cd97cc8b92 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -412,10 +412,10 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#ifndef HAVE_LIBUNWIND_SUPPORT +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) if (dwarf_callchain_users) { - ui__warning("Please install libunwind development packages " - "during the perf build.\n"); + ui__warning("Please install libunwind or libdw " + "development packages during the perf build.\n"); } #endif diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d4d3558fdef4..cfc6172ecab7 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3062,6 +3062,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build index ec22e899a224..9b5d5cbb7af7 100644 --- a/tools/perf/ui/gtk/Build +++ b/tools/perf/ui/gtk/Build @@ -7,3 +7,8 @@ gtk-y += util.o gtk-y += helpline.o gtk-y += progress.o gtk-y += annotate.o +gtk-y += zalloc.o + +$(OUTPUT)ui/gtk/zalloc.o: ../lib/zalloc.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fdd5bddb3075..f67960bedebb 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -549,6 +549,7 @@ void maps__insert(struct maps *maps, struct map *map) if (maps_by_name == NULL) { __maps__free_maps_by_name(maps); + up_write(&maps->lock); return; } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 2c41d47f6f83..90d23cc3c8d4 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -18,7 +18,6 @@ * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static bool have_frontend_stalled; struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; @@ -144,7 +143,6 @@ void runtime_stat__exit(struct runtime_stat *st) void perf_stat__init_shadow_stats(void) { - have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); runtime_stat__init(&rt_stat); } @@ -853,10 +851,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); - } else if (have_frontend_stalled) { - out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", - "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index 0111d246d1ca..54a2857c2510 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -15,7 +15,7 @@ include $(srctree)/../../scripts/Makefile.include OUTPUT=$(srctree)/ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/power/acpi/ + OUTPUT := $(O)/tools/power/acpi/ endif #$(info Determined 'OUTPUT' to be $(OUTPUT)) diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c index 2f55d4d23446..6e04304560ca 100644 --- a/tools/power/cpupower/lib/cpufreq.c +++ b/tools/power/cpupower/lib/cpufreq.c @@ -332,21 +332,74 @@ void cpufreq_put_available_governors(struct cpufreq_available_governors *any) } -struct cpufreq_frequencies -*cpufreq_get_frequencies(const char *type, unsigned int cpu) +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu) { - struct cpufreq_frequencies *first = NULL; - struct cpufreq_frequencies *current = NULL; + struct cpufreq_available_frequencies *first = NULL; + struct cpufreq_available_frequencies *current = NULL; char one_value[SYSFS_PATH_MAX]; char linebuf[MAX_LINE_LEN]; - char fname[MAX_LINE_LEN]; unsigned int pos, i; unsigned int len; - snprintf(fname, MAX_LINE_LEN, "scaling_%s_frequencies", type); + len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies", + linebuf, sizeof(linebuf)); + if (len == 0) + return NULL; - len = sysfs_cpufreq_read_file(cpu, fname, - linebuf, sizeof(linebuf)); + pos = 0; + for (i = 0; i < len; i++) { + if (linebuf[i] == ' ' || linebuf[i] == '\n') { + if (i - pos < 2) + continue; + if (i - pos >= SYSFS_PATH_MAX) + goto error_out; + if (current) { + current->next = malloc(sizeof(*current)); + if (!current->next) + goto error_out; + current = current->next; + } else { + first = malloc(sizeof(*first)); + if (!first) + goto error_out; + current = first; + } + current->first = first; + current->next = NULL; + + memcpy(one_value, linebuf + pos, i - pos); + one_value[i - pos] = '\0'; + if (sscanf(one_value, "%lu", ¤t->frequency) != 1) + goto error_out; + + pos = i + 1; + } + } + + return first; + + error_out: + while (first) { + current = first->next; + free(first); + first = current; + } + return NULL; +} + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu) +{ + struct cpufreq_available_frequencies *first = NULL; + struct cpufreq_available_frequencies *current = NULL; + char one_value[SYSFS_PATH_MAX]; + char linebuf[MAX_LINE_LEN]; + unsigned int pos, i; + unsigned int len; + + len = sysfs_cpufreq_read_file(cpu, "scaling_boost_frequencies", + linebuf, sizeof(linebuf)); if (len == 0) return NULL; @@ -391,9 +444,9 @@ struct cpufreq_frequencies return NULL; } -void cpufreq_put_frequencies(struct cpufreq_frequencies *any) +void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies *any) { - struct cpufreq_frequencies *tmp, *next; + struct cpufreq_available_frequencies *tmp, *next; if (!any) return; @@ -406,6 +459,11 @@ void cpufreq_put_frequencies(struct cpufreq_frequencies *any) } } +void cpufreq_put_boost_frequencies(struct cpufreq_available_frequencies *any) +{ + cpufreq_put_available_frequencies(any); +} + static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu, const char *file) { diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h index a55f0d19215b..95f4fd9e2656 100644 --- a/tools/power/cpupower/lib/cpufreq.h +++ b/tools/power/cpupower/lib/cpufreq.h @@ -20,10 +20,10 @@ struct cpufreq_available_governors { struct cpufreq_available_governors *first; }; -struct cpufreq_frequencies { +struct cpufreq_available_frequencies { unsigned long frequency; - struct cpufreq_frequencies *next; - struct cpufreq_frequencies *first; + struct cpufreq_available_frequencies *next; + struct cpufreq_available_frequencies *first; }; @@ -124,11 +124,17 @@ void cpufreq_put_available_governors( * cpufreq_put_frequencies after use. */ -struct cpufreq_frequencies -*cpufreq_get_frequencies(const char *type, unsigned int cpu); +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu); -void cpufreq_put_frequencies( - struct cpufreq_frequencies *first); +void cpufreq_put_available_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu); + +void cpufreq_put_boost_frequencies( + struct cpufreq_available_frequencies *first); /* determine affected CPUs diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index e63cf55f81cf..6efc0f6b1b11 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -244,14 +244,14 @@ static int get_boost_mode_x86(unsigned int cpu) static int get_boost_mode(unsigned int cpu) { - struct cpufreq_frequencies *freqs; + struct cpufreq_available_frequencies *freqs; if (cpupower_cpu_info.vendor == X86_VENDOR_AMD || cpupower_cpu_info.vendor == X86_VENDOR_HYGON || cpupower_cpu_info.vendor == X86_VENDOR_INTEL) return get_boost_mode_x86(cpu); - freqs = cpufreq_get_frequencies("boost", cpu); + freqs = cpufreq_get_boost_frequencies(cpu); if (freqs) { printf(_(" boost frequency steps: ")); while (freqs->next) { @@ -261,7 +261,7 @@ static int get_boost_mode(unsigned int cpu) } print_speed(freqs->frequency); printf("\n"); - cpufreq_put_frequencies(freqs); + cpufreq_put_available_frequencies(freqs); } return 0; @@ -475,7 +475,7 @@ static int get_latency(unsigned int cpu, unsigned int human) static void debug_output_one(unsigned int cpu) { - struct cpufreq_frequencies *freqs; + struct cpufreq_available_frequencies *freqs; get_driver(cpu); get_related_cpus(cpu); @@ -483,7 +483,7 @@ static void debug_output_one(unsigned int cpu) get_latency(cpu, 1); get_hardware_limits(cpu, 1); - freqs = cpufreq_get_frequencies("available", cpu); + freqs = cpufreq_get_available_frequencies(cpu); if (freqs) { printf(_(" available frequency steps: ")); while (freqs->next) { @@ -493,7 +493,7 @@ static void debug_output_one(unsigned int cpu) } print_speed(freqs->frequency); printf("\n"); - cpufreq_put_frequencies(freqs); + cpufreq_put_available_frequencies(freqs); } get_available_governors(cpu); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 220d04f958a6..42b6cd41d2ea 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1383,7 +1383,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index e59eb9e7f923..180ad1e1b04f 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -24,6 +24,8 @@ KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig']) +KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] + class KunitStatus(Enum): SUCCESS = auto() CONFIG_FAILURE = auto() @@ -35,6 +37,13 @@ def create_default_kunitconfig(): shutil.copyfile('arch/um/configs/kunit_defconfig', kunit_kernel.kunitconfig_path) +def get_kernel_root_path(): + parts = sys.argv[0] if not __file__ else __file__ + parts = os.path.realpath(parts).split('tools/testing/kunit') + if len(parts) != 2: + sys.exit(1) + return parts[0] + def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: config_start = time.time() @@ -114,6 +123,9 @@ def main(argv, linux=None): cli_args = parser.parse_args(argv) if cli_args.subcommand == 'run': + if get_kernel_root_path(): + os.chdir(get_kernel_root_path()) + if cli_args.build_dir: if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index a83111a32d4a..41ec274633c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -20,7 +20,7 @@ extern char NAME##_data[]; \ extern int NAME##_size; ssize_t get_base_addr() { - size_t start; + size_t start, offset; char buf[256]; FILE *f; @@ -28,10 +28,11 @@ ssize_t get_base_addr() { if (!f) return -errno; - while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { + while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", + &start, buf, &offset) == 3) { if (strcmp(buf, "r-xp") == 0) { fclose(f); - return start; + return start - offset; } } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3003fddc0613..cf6c87936c69 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,6 +4,7 @@ #include #include #include +#include "libbpf_internal.h" static void on_sample(void *ctx, int cpu, void *data, __u32 size) { @@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void test_perf_buffer(void) { - int err, prog_fd, nr_cpus, i, duration = 0; + int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; const char *prog_name = "kprobe/sys_nanosleep"; const char *file = "./test_perf_buffer.o"; struct perf_buffer_opts pb_opts = {}; @@ -29,15 +30,27 @@ void test_perf_buffer(void) struct bpf_object *obj; struct perf_buffer *pb; struct bpf_link *link; + bool *online; nr_cpus = libbpf_num_possible_cpus(); if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) return; + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", + &online, &on_len); + if (CHECK(err, "nr_on_cpus", "err %d\n", err)) + return; + + for (i = 0; i < on_len; i++) + if (online[i]) + nr_on_cpus++; + /* load program */ err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) - return; + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out_close; + } prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) @@ -64,6 +77,11 @@ void test_perf_buffer(void) /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) { + printf("skipping offline CPU #%d\n", i); + continue; + } + CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -81,8 +99,8 @@ void test_perf_buffer(void) if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) goto out_free_pb; - if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", - "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) + if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", + "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; out_free_pb: @@ -91,4 +109,5 @@ void test_perf_buffer(void) bpf_link__destroy(link); out_close: bpf_object__close(obj); + free(online); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c new file mode 100644 index 000000000000..aa43e0bd210c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare + +#include "test_progs.h" + +#define TCP_REPAIR 19 /* TCP sock is under repair right now */ + +#define TCP_REPAIR_ON 1 +#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ + +static int connected_socket_v4(void) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(80), + .sin_addr = { inet_addr("127.0.0.1") }, + }; + socklen_t len = sizeof(addr); + int s, repair, err; + + s = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK_FAIL(s == -1)) + goto error; + + repair = TCP_REPAIR_ON; + err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); + if (CHECK_FAIL(err)) + goto error; + + err = connect(s, (struct sockaddr *)&addr, len); + if (CHECK_FAIL(err)) + goto error; + + repair = TCP_REPAIR_OFF_NO_WP; + err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); + if (CHECK_FAIL(err)) + goto error; + + return s; +error: + perror(__func__); + close(s); + return -1; +} + +/* Create a map, populate it with one socket, and free the map. */ +static void test_sockmap_create_update_free(enum bpf_map_type map_type) +{ + const int zero = 0; + int s, map, err; + + s = connected_socket_v4(); + if (CHECK_FAIL(s == -1)) + return; + + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + if (CHECK_FAIL(map == -1)) { + perror("bpf_create_map"); + goto out; + } + + err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST); + if (CHECK_FAIL(err)) { + perror("bpf_map_update"); + goto out; + } + +out: + close(map); + close(s); +} + +void test_sockmap_basic(void) +{ + if (test__start_subtest("sockmap create_update_free")) + test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash create_update_free")) + test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f62aa0eb959b..1735faf17536 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -49,8 +49,12 @@ void test_stacktrace_build_id_nmi(void) pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, "perf_event_open", - "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", + if (pmu_fd < 0 && errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, errno)) goto close_prog; diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index ea7d84f01235..e6be383a003f 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) data_check.skb_ports[0] = th->source; data_check.skb_ports[1] = th->dest; + if (th->fin) + /* The connection is being torn down at the end of a + * test. It can't contain a cmd, so return early. + */ + return SK_PASS; + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) GOTO_DONE(DROP_ERR_SKB_DATA); if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c index 7566c13eb51a..079d0f5a2909 100644 --- a/tools/testing/selftests/bpf/test_select_reuseport.c +++ b/tools/testing/selftests/bpf/test_select_reuseport.c @@ -30,7 +30,7 @@ #define REUSEPORT_ARRAY_SIZE 32 static int result_map, tmp_index_ovr_map, linum_map, data_check_map; -static enum result expected_results[NR_RESULTS]; +static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; static int reuseport_array, outer_map; static int select_by_skb_data_prog; @@ -662,7 +662,19 @@ static void setup_per_test(int type, unsigned short family, bool inany) static void cleanup_per_test(void) { - int i, err; + int i, err, zero = 0; + + memset(expected_results, 0, sizeof(expected_results)); + + for (i = 0; i < NR_RESULTS; i++) { + err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY); + RET_IF(err, "reset elem in result_map", + "i:%u err:%d errno:%d\n", i, err, errno); + } + + err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY); + RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n", + err, errno); for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) close(sk_fds[i]); diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 4a851513c842..779e11da979c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, FILE *file; int i, fp; - file = fopen(".sendpage_tst.tmp", "w+"); + file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; @@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, fwrite(&k, sizeof(char), 1, file); fflush(file); fseek(file, 0, SEEK_SET); - fclose(file); - fp = open(".sendpage_tst.tmp", O_RDONLY); - if (fp < 0) { - perror("reopen file for sendpage"); - return 1; - } + fp = fileno(file); clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { @@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, if (!drop && sent < 0) { perror("send loop error"); - close(fp); + fclose(file); return sent; } else if (drop && sent >= 0) { printf("sendpage loop error expected: %i\n", sent); - close(fp); + fclose(file); return -EIO; } @@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); - close(fp); + fclose(file); return 0; } diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index c5ca669feb2b..e19ce940cd6a 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -369,7 +369,7 @@ static void *dummy_thread_fn(void *arg) static int test_cgcore_proc_migration(const char *root) { int ret = KSFT_FAIL; - int t, c_threads, n_threads = 13; + int t, c_threads = 0, n_threads = 13; char *src = NULL, *dst = NULL; pthread_t threads[n_threads]; diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index cd1f5b3a7774..d6e106fbce11 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -2,7 +2,7 @@ all: TEST_PROGS := ftracetest -TEST_FILES := test.d +TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* include ../lib.mk diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index a09f57061902..4660128a545e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -5,3 +5,4 @@ futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock +futex_wait_multiple diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index 30996306cabc..75f9fface11f 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -14,7 +14,8 @@ TEST_GEN_FILES := \ futex_requeue_pi_signal_restart \ futex_requeue_pi_mismatched_ops \ futex_wait_uninitialized_heap \ - futex_wait_private_mapped_file + futex_wait_private_mapped_file \ + futex_wait_multiple TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_wait_multiple.c b/tools/testing/selftests/futex/functional/futex_wait_multiple.c new file mode 100644 index 000000000000..b48422e79f42 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_multiple.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/****************************************************************************** + * + * Copyright Β© Collabora, Ltd., 2019 + * + * DESCRIPTION + * Test basic semantics of FUTEX_WAIT_MULTIPLE + * + * AUTHOR + * Gabriel Krisman Bertazi + * + * HISTORY + * 2019-Dec-13: Initial version by Krisman + * + *****************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include "futextest.h" +#include "logging.h" + +#define TEST_NAME "futex-wait-multiple" +#define timeout_ns 100000 +#define MAX_COUNT 128 +#define WAKE_WAIT_US 3000000 + +int ret = RET_PASS; +char *progname; +futex_t f[MAX_COUNT] = {0}; +struct futex_wait_block fwb[MAX_COUNT]; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void test_count_overflow(void) +{ + futex_t f = FUTEX_INITIALIZER; + struct futex_wait_block fwb[MAX_COUNT+1]; + int res, i; + + ksft_print_msg("%s: Test a too big number of futexes\n", progname); + + for (i = 0; i < MAX_COUNT+1; i++) { + fwb[i].uaddr = &f; + fwb[i].val = f; + fwb[i].bitset = 0; + } + + res = futex_wait_multiple(fwb, MAX_COUNT+1, NULL, FUTEX_PRIVATE_FLAG); + +#ifdef __ILP32__ + if (res != -1 || errno != ENOSYS) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; + } else { + ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); + } +#else + if (res != -1 || errno != EINVAL) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; + } else { + ksft_test_result_pass("futex_wait_multiple count overflow succeed\n"); + } + +#endif /* __ILP32__ */ +} + +void *waiterfn(void *arg) +{ + int res; + + res = futex_wait_multiple(fwb, MAX_COUNT, NULL, FUTEX_PRIVATE_FLAG); + +#ifdef __ILP32__ + if (res != -1 || errno != ENOSYS) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; + } else { + ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); + } +#else + if (res < 0) + ksft_print_msg("waiter failed %d\n", res); + + info("futex_wait_multiple: Got hint futex %d was freed\n", res); +#endif /* __ILP32__ */ + + return NULL; +} + +void test_fwb_wakeup(void) +{ + int res, i; + pthread_t waiter; + + ksft_print_msg("%s: Test wake up in a list of futex\n", progname); + + for (i = 0; i < MAX_COUNT; i++) { + fwb[i].uaddr = &f[i]; + fwb[i].val = f[i]; + fwb[i].bitset = 0xffffffff; + } + + res = pthread_create(&waiter, NULL, waiterfn, NULL); + if (res) { + ksft_test_result_fail("Creating waiting thread failed"); + ksft_exit_fail(); + } + + usleep(WAKE_WAIT_US); + res = futex_wake(&(f[MAX_COUNT-1]), 1, FUTEX_PRIVATE_FLAG); + if (res != 1) { + ksft_test_result_fail("Failed to wake thread res=%d\n", res); + ksft_exit_fail(); + } + + pthread_join(waiter, NULL); + ksft_test_result_pass("%s succeed\n", __func__); +} + +int main(int argc, char *argv[]) +{ + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + progname = basename(argv[0]); + + ksft_print_header(); + ksft_set_plan(2); + + test_count_overflow(); + +#ifdef __ILP32__ + // if it's a 32x binary, there's no futex to wakeup + ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); +#else + test_fwb_wakeup(); +#endif /* __ILP32__ */ + + ksft_print_cnts(); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index ee55e6d389a3..2a63e1c2cfb6 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -11,6 +11,7 @@ * * HISTORY * 2009-Nov-6: Initial version by Darren Hart + * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman * *****************************************************************************/ @@ -41,6 +42,8 @@ int main(int argc, char *argv[]) { futex_t f1 = FUTEX_INITIALIZER; struct timespec to; + time_t secs; + struct futex_wait_block fwb = {&f1, f1, 0}; int res, ret = RET_PASS; int c; @@ -65,7 +68,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); @@ -79,8 +82,39 @@ int main(int argc, char *argv[]) if (!res || errno != ETIMEDOUT) { fail("futex_wait returned %d\n", ret < 0 ? errno : ret); ret = RET_FAIL; + } else + ksft_test_result_pass("futex_wait timeout succeeds\n"); + + info("Calling futex_wait_multiple on f1: %u @ %p\n", f1, &f1); + + /* Setup absolute time */ + ret = clock_gettime(CLOCK_REALTIME, &to); + secs = (to.tv_nsec + timeout_ns) / 1000000000; + to.tv_nsec = ((int64_t)to.tv_nsec + timeout_ns) % 1000000000; + to.tv_sec += secs; + info("to.tv_sec = %ld\n", to.tv_sec); + info("to.tv_nsec = %ld\n", to.tv_nsec); + + res = futex_wait_multiple(&fwb, 1, &to, + FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME); + +#ifdef __ILP32__ + if (res == -1 && errno == ENOSYS) { + ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); + } else { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; } +#else + if (!res || errno != ETIMEDOUT) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; + } else + ksft_test_result_pass("futex_wait_multiple timeout succeeds\n"); +#endif /* __ILP32__ */ - print_result(TEST_NAME, ret); + ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 0ae390ff8164..bcbac042992d 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -12,6 +12,7 @@ * * HISTORY * 2009-Nov-14: Initial version by Gowrishankar + * 2019-Dec-13: Add WAIT_MULTIPLE test by Krisman * *****************************************************************************/ @@ -40,6 +41,7 @@ int main(int argc, char *argv[]) { struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; futex_t f1 = FUTEX_INITIALIZER; + struct futex_wait_block fwb = {&f1, f1+1, 0}; int res, ret = RET_PASS; int c; @@ -61,7 +63,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); @@ -71,8 +73,30 @@ int main(int argc, char *argv[]) fail("futex_wait returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; + } else + ksft_test_result_pass("futex_wait wouldblock succeeds\n"); + + info("Calling futex_wait_multiple on f1: %u @ %p with val=%u\n", + f1, &f1, f1+1); + res = futex_wait_multiple(&fwb, 1, NULL, FUTEX_PRIVATE_FLAG); + +#ifdef __ILP32__ + if (res != -1 || errno != ENOSYS) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; + } else { + ksft_test_result_skip("futex_wait_multiple not supported at x32\n"); + } +#else + if (!res || errno != EWOULDBLOCK) { + ksft_test_result_fail("futex_wait_multiple returned %d\n", + res < 0 ? errno : res); + ret = RET_FAIL; } + ksft_test_result_pass("futex_wait_multiple wouldblock succeeds\n"); +#endif /* __ILP32__ */ - print_result(TEST_NAME, ret); + ksft_print_cnts(); return ret; } diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 1acb6ace1680..a8be94f28ff7 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -73,3 +73,6 @@ echo echo ./futex_wait_uninitialized_heap $COLOR ./futex_wait_private_mapped_file $COLOR + +echo +./futex_wait_multiple $COLOR diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..bb103bef4557 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -38,6 +38,14 @@ typedef volatile u_int32_t futex_t; #ifndef FUTEX_CMP_REQUEUE_PI #define FUTEX_CMP_REQUEUE_PI 12 #endif +#ifndef FUTEX_WAIT_MULTIPLE +#define FUTEX_WAIT_MULTIPLE 13 +struct futex_wait_block { + futex_t *uaddr; + futex_t val; + __u32 bitset; +}; +#endif #ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) @@ -80,6 +88,20 @@ futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } +/** + * futex_wait_multiple() - block on several futexes with optional timeout + * @fwb: wait block user space address + * @count: number of entities at fwb + * @timeout: absolute timeout + */ +static inline int +futex_wait_multiple(struct futex_wait_block *fwb, int count, + struct timespec *timeout, int opflags) +{ + return futex(fwb, FUTEX_WAIT_MULTIPLE, count, timeout, NULL, 0, + opflags); +} + /** * futex_wake() - wake one or more tasks blocked on uaddr * @nr_wake: wake up to this many tasks diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index a8d20cbb711c..e84d901f8567 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -91,7 +91,7 @@ run_one() run_many() { echo "TAP version 13" - DIR=$(basename "$PWD") + DIR="${PWD#${BASE_DIR}/}" test_num=0 total=$(echo "$@" | wc -w) echo "1..$total" diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 1c8a1963d03f..3ed0134a764d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -83,17 +83,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile index 3876d8d62494..1acc9e1fa3fb 100644 --- a/tools/testing/selftests/livepatch/Makefile +++ b/tools/testing/selftests/livepatch/Makefile @@ -8,4 +8,6 @@ TEST_PROGS := \ test-state.sh \ test-ftrace.sh +TEST_FILES := settings + include ../lib.mk diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 6dd403103800..b7616704b55e 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -910,6 +910,12 @@ ipv6_rt_replace_mpath() check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024" log_test $? 0 "Multipath with single path via multipath attribute" + # multipath with dev-only + add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1" + check_route6 "2001:db8:104::/64 dev veth1 metric 1024" + log_test $? 0 "Multipath with dev-only" + # route replace fails - invalid nexthop 1 add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3" @@ -1035,6 +1041,27 @@ ipv6_addr_metric_test() fi log_test $rc 0 "Prefix route with metric on link up" + # verify peer metric added correctly + set -e + run_cmd "$IP -6 addr flush dev dummy2" + run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260" + set +e + + check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260" + log_test $? 0 "Set metric with peer route on local side" + log_test $? 0 "User specified metric on local address" + check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260" + log_test $? 0 "Set metric with peer route on peer side" + + set -e + run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261" + set +e + + check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261" + log_test $? 0 "Modify metric and peer address on local side" + check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261" + log_test $? 0 "Modify metric and peer address on peer side" + $IP li del dummy1 $IP li del dummy2 cleanup @@ -1451,13 +1478,20 @@ ipv4_addr_metric_test() run_cmd "$IP addr flush dev dummy2" run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" - run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" rc=$? if [ $rc -eq 0 ]; then - check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" + check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260" + rc=$? + fi + log_test $rc 0 "Set metric of address with peer route" + + run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" rc=$? fi - log_test $rc 0 "Modify metric of address with peer route" + log_test $rc 0 "Modify metric and peer address for peer route" $IP li del dummy1 $IP li del dummy2 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index e6fd7a18c655..0266443601bc 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -63,22 +63,23 @@ test_span_gre_mac() { local tundev=$1; shift local direction=$1; shift - local prot=$1; shift local what=$1; shift - local swp3mac=$(mac_get $swp3) - local h3mac=$(mac_get $h3) + case "$direction" in + ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2) + ;; + egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1) + ;; + esac RET=0 mirror_install $swp1 $direction $tundev "matchall $tcflags" - tc filter add dev $h3 ingress pref 77 prot $prot \ - flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \ - action pass + icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 - tc filter del dev $h3 ingress pref 77 + icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction log_test "$direction $what: envelope MAC ($tcflags)" @@ -120,14 +121,14 @@ test_ip6gretap() test_gretap_mac() { - test_span_gre_mac gt4 ingress ip "mirror to gretap" - test_span_gre_mac gt4 egress ip "mirror to gretap" + test_span_gre_mac gt4 ingress "mirror to gretap" + test_span_gre_mac gt4 egress "mirror to gretap" } test_ip6gretap_mac() { - test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap" - test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap" + test_span_gre_mac gt6 ingress "mirror to ip6gretap" + test_span_gre_mac gt6 egress "mirror to ip6gretap" } test_all() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index bb10e33690b2..ce6bea9675c0 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -516,9 +516,9 @@ test_tos() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos 0x40 action pass - vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 - vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x18" v1 egress 77 0 tc filter del dev v1 egress pref 77 prot ip log_test "VXLAN: envelope TOS inheritance" diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 34df4c8882af..383bac05ac32 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -12,7 +12,11 @@ #include #include #include +#include #include +#include +#include +#include #include #include #include @@ -28,7 +32,7 @@ static int cfg_clockid = CLOCK_TAI; static bool cfg_do_ipv4; static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; -static int cfg_variance_us = 2000; +static int cfg_variance_us = 4000; static uint64_t glob_tstart; @@ -43,6 +47,9 @@ static struct timed_send cfg_in[MAX_NUM_PKT]; static struct timed_send cfg_out[MAX_NUM_PKT]; static int cfg_num_pkt; +static int cfg_errq_level; +static int cfg_errq_type; + static uint64_t gettime_ns(void) { struct timespec ts; @@ -90,13 +97,15 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static void do_recv_one(int fdr, struct timed_send *ts) +static bool do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; int ret; ret = recv(fdr, rbuf, sizeof(rbuf), 0); + if (ret == -1 && errno == EAGAIN) + return true; if (ret == -1) error(1, errno, "read"); if (ret != 1) @@ -113,6 +122,8 @@ static void do_recv_one(int fdr, struct timed_send *ts) if (labs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + + return false; } static void do_recv_verify_empty(int fdr) @@ -125,12 +136,70 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } +static void do_recv_errqueue_timeout(int fdt) +{ + char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; + char data[sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + 1]; + struct sock_extended_err *err; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + int64_t tstamp = 0; + int ret; + + iov.iov_base = data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (1) { + ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != MSG_ERRQUEUE) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != cfg_errq_level || + cm->cmsg_type != cfg_errq_type) + error(1, 0, "errqueue: type 0x%x.0x%x\n", + cm->cmsg_level, cm->cmsg_type); + + err = (struct sock_extended_err *)CMSG_DATA(cm); + if (err->ee_origin != SO_EE_ORIGIN_TXTIME) + error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin); + if (err->ee_code != ECANCELED) + error(1, 0, "errqueue: code 0x%x\n", err->ee_code); + + tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; + tstamp -= (int64_t) glob_tstart; + tstamp /= 1000 * 1000; + fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n", + data[ret - 1], tstamp); + + msg.msg_flags = 0; + msg.msg_controllen = sizeof(control); + } + + error(1, 0, "recv: timeout"); +} + static void setsockopt_txtime(int fd) { struct sock_txtime so_txtime_val = { .clockid = cfg_clockid }; struct sock_txtime so_txtime_val_read = { 0 }; socklen_t vallen = sizeof(so_txtime_val); + so_txtime_val.flags = SOF_TXTIME_REPORT_ERRORS; + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, &so_txtime_val, sizeof(so_txtime_val))) error(1, errno, "setsockopt txtime"); @@ -194,7 +263,8 @@ static void do_test(struct sockaddr *addr, socklen_t alen) for (i = 0; i < cfg_num_pkt; i++) do_send_one(fdt, &cfg_in[i]); for (i = 0; i < cfg_num_pkt; i++) - do_recv_one(fdr, &cfg_out[i]); + if (do_recv_one(fdr, &cfg_out[i])) + do_recv_errqueue_timeout(fdt); do_recv_verify_empty(fdr); @@ -280,6 +350,10 @@ int main(int argc, char **argv) addr6.sin6_family = AF_INET6; addr6.sin6_port = htons(cfg_port); addr6.sin6_addr = in6addr_loopback; + + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; + do_test((void *)&addr6, sizeof(addr6)); } @@ -289,6 +363,10 @@ int main(int argc, char **argv) addr4.sin_family = AF_INET; addr4.sin_port = htons(cfg_port); addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; + do_test((void *)&addr4, sizeof(addr4)); } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 5aa519328a5b..3f7800eaecb1 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,7 +5,12 @@ # Run in network namespace if [[ $# -eq 0 ]]; then - ./in_netns.sh $0 __subprocess + if ! ./in_netns.sh $0 __subprocess; then + # test is time sensitive, can be flaky + echo "test failed: retry once" + ./in_netns.sh $0 __subprocess + fi + exit $? fi @@ -18,7 +23,7 @@ tc qdisc add dev lo root fq ./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 ./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then +if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then ! ./so_txtime -4 -6 -c tai a,-1 a,-1 ! ./so_txtime -4 -6 -c tai a,0 a,0 ./so_txtime -4 -6 -c tai a,10 a,10 diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 8d069490e17b..161facf28d6a 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -2,3 +2,4 @@ pidfd_open_test pidfd_poll_test pidfd_test pidfd_wait +pidfd_fdinfo_test diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh index 26112ab5cdf4..f52ed92b53e7 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -53,9 +53,13 @@ eeh_one_dev() { # is a no-op. echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check - # Enforce a 30s timeout for recovery. Even the IPR, which is infamously - # slow to reset, should recover within 30s. - max_wait=30 + # Default to a 60s timeout when waiting for a device to recover. This + # is an arbitrary default which can be overridden by setting the + # EEH_MAX_WAIT environmental variable when required. + + # The current record holder for longest recovery time is: + # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds + max_wait=${EEH_MAX_WAIT:=60} for i in `seq 0 ${max_wait}` ; do if pe_ok $dev ; then diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index d6469535630a..2af9d39a9716 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -4,7 +4,7 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) CLANG_FLAGS += -no-integrated-as endif -CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ \ +CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \ $(CLANG_FLAGS) LDLIBS += -lpthread @@ -19,6 +19,8 @@ TEST_GEN_PROGS_EXTENDED = librseq.so TEST_PROGS = run_param_test.sh +TEST_FILES := settings + include ../lib.mk $(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index de9c8566672a..90fa1a346908 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -6,4 +6,6 @@ TEST_GEN_PROGS = rtctest TEST_GEN_PROGS_EXTENDED = setdate +TEST_FILES := settings + include ../lib.mk diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index 2ad45b944355..2980b1a63366 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -11,23 +11,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include #include +#include #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -79,12 +91,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -100,5 +112,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py index e98c36750fae..d34fe06268d2 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -54,7 +54,7 @@ def _make(self, target): shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=ENVIR) + env=os.environ.copy()) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index 2e361cea63bc..98a20faf3198 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -6,6 +6,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -25,6 +28,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -44,6 +50,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -63,6 +72,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -82,6 +94,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -101,6 +116,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -120,6 +138,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -139,6 +160,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -158,6 +182,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -177,6 +204,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -196,6 +226,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -215,6 +248,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -234,6 +270,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -253,6 +292,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -272,6 +314,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -291,6 +336,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], @@ -310,6 +358,9 @@ "filter", "basic" ], + "plugins": { + "requires": "nsPlugin" + }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 8155c2ea7ccb..b630c7b5950a 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -1,8 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +self.flags = flags -python -m unittest -v tpm2_tests.SmokeTest -python -m unittest -v tpm2_tests.AsyncTest +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + + +if [ -f /dev/tpm0 ] ; then + python -m unittest -v tpm2_tests.SmokeTest + python -m unittest -v tpm2_tests.AsyncTest +else + exit $ksft_skip +fi CLEAR_CMD=$(which tpm2_clear) if [ -n $CLEAR_CMD ]; then diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh index a6f5e346635e..180b469c53b4 100755 --- a/tools/testing/selftests/tpm2/test_space.sh +++ b/tools/testing/selftests/tpm2/test_space.sh @@ -1,4 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -python -m unittest -v tpm2_tests.SpaceTest +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if [ -f /dev/tpmrm0 ] ; then + python -m unittest -v tpm2_tests.SpaceTest +else + exit $ksft_skip +fi diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh new file mode 100755 index 000000000000..138d46b3f330 --- /dev/null +++ b/tools/testing/selftests/wireguard/netns.sh @@ -0,0 +1,550 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. +# +# This script tests the below topology: +# +# β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +# β”‚ $ns1 namespace β”‚ β”‚ $ns0 namespace β”‚ β”‚ $ns2 namespace β”‚ +# β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +# β”‚β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”β”‚ +# β”‚β”‚ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 β”‚β”‚ +# β”‚β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”‚ +# β”‚β”‚192.168.241.1/24 β”‚β”‚ β”‚ β”‚(ns1) (ns2) β”‚ β”‚ β”‚β”‚192.168.241.2/24 β”‚β”‚ +# β”‚β”‚fd00::1/24 β”‚β”‚ β”‚ β”‚127.0.0.1:1 127.0.0.1:2β”‚ β”‚ β”‚β”‚fd00::2/24 β”‚β”‚ +# β”‚β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜β”‚ β”‚ β”‚[::]:1 [::]:2 β”‚ β”‚ β”‚β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜β”‚ +# β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +# β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +# +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further +# details on how this is accomplished. +set -e + +exec 3>&1 +export LANG=C +export WG_HIDE_KEYS=never +netns0="wg-test-$$-0" +netns1="wg-test-$$-1" +netns2="wg-test-$$-2" +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } +pp() { pretty "" "$*"; "$@"; } +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } +sleep() { read -t "$1" -N 1 || true; } +waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } + +cleanup() { + set +e + exec 2>/dev/null + printf "$orig_message_cost" > /proc/sys/net/core/message_cost + ip0 link del dev wg0 + ip1 link del dev wg0 + ip2 link del dev wg0 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" + [[ -n $to_kill ]] && kill $to_kill + pp ip netns del $netns1 + pp ip netns del $netns2 + pp ip netns del $netns0 + exit +} + +orig_message_cost="$(< /proc/sys/net/core/message_cost)" +trap cleanup EXIT +printf 0 > /proc/sys/net/core/message_cost + +ip netns del $netns0 2>/dev/null || true +ip netns del $netns1 2>/dev/null || true +ip netns del $netns2 2>/dev/null || true +pp ip netns add $netns0 +pp ip netns add $netns1 +pp ip netns add $netns2 +ip0 link set up dev lo + +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns1 +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns2 +key1="$(pp wg genkey)" +key2="$(pp wg genkey)" +key3="$(pp wg genkey)" +pub1="$(pp wg pubkey <<<"$key1")" +pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" +psk="$(pp wg genpsk)" +[[ -n $key1 && -n $key2 && -n $psk ]] + +configure_peers() { + ip1 addr add 192.168.241.1/24 dev wg0 + ip1 addr add fd00::1/24 dev wg0 + + ip2 addr add 192.168.241.2/24 dev wg0 + ip2 addr add fd00::2/24 dev wg0 + + n1 wg set wg0 \ + private-key <(echo "$key1") \ + listen-port 1 \ + peer "$pub2" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.2/32,fd00::2/128 + n2 wg set wg0 \ + private-key <(echo "$key2") \ + listen-port 2 \ + peer "$pub1" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.1/32,fd00::1/128 + + ip1 link set up dev wg0 + ip2 link set up dev wg0 +} +configure_peers + +tests() { + # Ping over IPv4 + n2 ping -c 10 -f -W 1 192.168.241.1 + n1 ping -c 10 -f -W 1 192.168.241.2 + + # Ping over IPv6 + n2 ping6 -c 10 -f -W 1 fd00::1 + n1 ping6 -c 10 -f -W 1 fd00::2 + + # TCP over IPv4 + n2 iperf3 -s -1 -B 192.168.241.2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -c 192.168.241.2 + + # TCP over IPv6 + n1 iperf3 -s -1 -B fd00::1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -c fd00::1 + + # UDP over IPv4 + n1 iperf3 -s -1 -B 192.168.241.1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 + + # UDP over IPv6 + n2 iperf3 -s -1 -B fd00::2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 +} + +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" +big_mtu=$(( 34816 - 1500 + $orig_mtu )) + +# Test using IPv4 as outer transport +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +# Before calling tests, we first make sure that the stats counters and timestamper are working +n2 ping -c 10 -f -W 1 192.168.241.1 +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ timestamp < <(n1 wg show wg0 latest-handshakes) +(( timestamp != 0 )) + +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv6 as outer transport +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +# Test that route MTUs work with the padding +ip1 link set wg0 mtu 1300 +ip2 link set wg0 mtu 1300 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +n0 iptables -A INPUT -m length --length 1360 -j DROP +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 +n0 iptables -F INPUT + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv4 that roaming works +ip0 -4 addr del 127.0.0.1/8 dev lo +ip0 -4 addr add 127.212.121.99/8 dev lo +n1 wg set wg0 listen-port 9999 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n1 ping6 -W 1 -c 1 fd00::2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] + +# Test using IPv6 that roaming works +n1 wg set wg0 listen-port 9998 +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] + +# Test that crypto-RP filter works +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] +kill $ncat_pid +more_specific_key="$(pp wg genkey | pp wg pubkey)" +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 +n2 wg set wg0 listen-port 9997 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +! read -r -N 1 -t 1 out <&4 || false +kill $ncat_pid +n1 wg set wg0 peer "$more_specific_key" remove +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] + +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 + +ip1 link del wg0 +ip2 link del wg0 + +# Test using NAT. We now change the topology to this: +# β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +# β”‚ $ns1 namespace β”‚ β”‚ $ns0 namespace β”‚ β”‚ $ns2 namespace β”‚ +# β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +# β”‚ β”Œβ”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β” β”‚ +# β”‚ β”‚ wg0 │─────────────│vethc│───────────┼────┼────│vethrcβ”‚ β”‚vethrs│──────────────┼─────┼──│veths│────────────│ wg0 β”‚ β”‚ +# β”‚ β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”β”‚ β”‚ β”œβ”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”œβ”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ β”‚ β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +# β”‚ β”‚192.168.241.1/24β”‚ β”‚192.168.1.100/24β”‚β”‚ β”‚ β”‚192.168.1.1/24 β”‚ β”‚10.0.0.1/24 β”‚ β”‚ β”‚ β”‚10.0.0.100/24 β”‚ β”‚192.168.241.2/24β”‚ β”‚ +# β”‚ β”‚fd00::1/24 β”‚ β”‚ β”‚β”‚ β”‚ β”‚ β”‚ β”‚SNAT:192.168.1.0/24β”‚ β”‚ β”‚ β”‚ β”‚ β”‚fd00::2/24 β”‚ β”‚ +# β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +# β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers + +ip0 link add vethrc type veth peer name vethc +ip0 link add vethrs type veth peer name veths +ip0 link set vethc netns $netns1 +ip0 link set veths netns $netns2 +ip0 link set vethrc up +ip0 link set vethrs up +ip0 addr add 192.168.1.1/24 dev vethrc +ip0 addr add 10.0.0.1/24 dev vethrs +ip1 addr add 192.168.1.100/24 dev vethc +ip1 link set vethc up +ip1 route add default via 192.168.1.1 +ip2 addr add 10.0.0.100/24 dev veths +ip2 link set veths up +waitiface $netns0 vethrc +waitiface $netns0 vethrs +waitiface $netns1 vethc +waitiface $netns2 veths + +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 + +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 +n1 ping -W 1 -c 1 192.168.241.2 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). +pp sleep 3 +n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +# Flood the pings instead of sending just one, to trigger routing table reference counting bugs. +n1 ping -W 1 -c 100 -f 192.168.99.7 +n1 ping -W 1 -c 100 -f abab::1111 + +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +ip0 -4 route add 192.168.241.1 via 10.0.0.100 +n2 wg set wg0 peer "$pub1" remove +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] + +n0 iptables -t nat -F +n0 iptables -t filter -F +n2 iptables -t nat -F +ip0 link del vethrc +ip0 link del vethrs +ip1 link del wg0 +ip2 link del wg0 + +# Test that saddr routing is sticky but not too sticky, changing to this topology: +# β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +# β”‚ $ns1 namespace β”‚ β”‚ $ns2 namespace β”‚ +# β”‚ β”‚ β”‚ β”‚ +# β”‚ β”Œβ”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β” β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β” β”‚ +# β”‚ β”‚ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 β”‚ β”‚ +# β”‚ β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”β”‚ β”‚ β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”œβ”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +# β”‚ β”‚192.168.241.1/24β”‚ β”‚10.0.0.1/24 β”‚β”‚ β”‚ β”‚10.0.0.2/24 β”‚ β”‚192.168.241.2/24β”‚ β”‚ +# β”‚ β”‚fd00::1/24 β”‚ β”‚fd00:aa::1/96 β”‚β”‚ β”‚ β”‚fd00:aa::2/96 β”‚ β”‚fd00::2/24 β”‚ β”‚ +# β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜β”‚ β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +# β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' + +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip2 addr add fd00:aa::2/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add 10.0.0.10/24 dev veth1 +ip1 addr del 10.0.0.1/24 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add fd00:aa::10/96 dev veth1 +ip1 addr del fd00:aa::1/96 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 + +# Now we show that we can successfully do reply to sender routing +ip1 link set veth1 down +ip2 link set veth2 down +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add 10.0.0.2/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip1 addr add fd00:aa::2/96 dev veth1 +ip2 addr add 10.0.0.3/24 dev veth2 +ip2 addr add fd00:aa::3/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] + +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + +ip1 link del veth1 +ip1 link del veth3 +ip1 link del wg0 +ip2 link del wg0 + +# We test that Netlink/IPC is working properly by doing things that usually cause split responses +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) +for a in {1..255}; do + for b in {0..255}; do + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +for ip in $(n0 wg show wg0 allowed-ips); do + ((++i)) +done +((i == 255*256*2+1)) +ip0 link del wg0 +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) +for a in {1..40}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) + for b in {1..52}; do + config+=( "AllowedIPs=$a.$b.0.0/16" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +while read -r line; do + j=0 + for ip in $line; do + ((++j)) + done + ((j == 53)) + ((++i)) +done < <(n0 wg show wg0 allowed-ips) +((i == 40)) +ip0 link del wg0 +ip0 link add wg0 type wireguard +config=( ) +for i in {1..29}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) +done +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +n0 wg showconf wg0 > /dev/null +ip0 link del wg0 + +allowedips=( ) +for i in {1..197}; do + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" +{ + read -r pub allowedips + [[ $pub == "$pub1" && $allowedips == "(none)" ]] + read -r pub allowedips + [[ $pub == "$pub2" ]] + i=0 + for _ in $allowedips; do + ((++i)) + done + ((i == 197)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + +! n0 wg show doesnotexist || false + +ip0 link add wg0 type wireguard +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") +[[ $(n0 wg show wg0 private-key) == "$key1" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null +[[ $(n0 wg show wg0 private-key) == "(none)" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] +n0 wg set wg0 peer "$pub2" +n0 wg set wg0 private-key <(echo "$key2") +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 peer "$pub2" +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") +n0 wg set wg0 peer "$pub2" +[[ $(n0 wg show wg0 peers) == "$pub2" ]] +n0 wg set wg0 private-key <(echo "/${key1:1}") +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 +n0 wg set wg0 peer "$pub2" remove +low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) +n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +ip0 link del wg0 + +declare -A objects +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" +done < /dev/kmsg +alldeleted=1 +for object in "${!objects[@]}"; do + if [[ ${objects["$object"]} != *createddestroyed ]]; then + echo "Error: $object: merely ${objects["$object"]}" >&3 + alldeleted=0 + fi +done +[[ $alldeleted -eq 1 ]] +pretty "" "Objects that were created were also destroyed." diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore new file mode 100644 index 000000000000..415b542a9d59 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -0,0 +1,2 @@ +build/ +distfiles/ diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile new file mode 100644 index 000000000000..28d477683e8a --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -0,0 +1,377 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + +PWD := $(shell pwd) + +CHOST := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) +ifneq (,$(ARCH)) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) +ifeq (,$(CBUILD)) +$(error The toolchain for $(ARCH) is not installed) +endif +else +CBUILD := $(CHOST) +ARCH := $(firstword $(subst -, ,$(CBUILD))) +endif + +# Set these from the environment to override +KERNEL_PATH ?= $(PWD)/../../../../.. +BUILD_PATH ?= $(PWD)/build/$(ARCH) +DISTFILES_PATH ?= $(PWD)/distfiles +NR_CPUS ?= 4 + +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ + +default: qemu + +# variable name, tarball project name, version, tarball extension, default URI base +define tar_download = +$(1)_VERSION := $(3) +$(1)_NAME := $(2)-$$($(1)_VERSION) +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4) +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) +$(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) +endef + +define file_download = +$(DISTFILES_PATH)/$(1): + mkdir -p $(DISTFILES_PATH) + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' +endef + +$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) +$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) +$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) +$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64)) + +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) + +export CFLAGS ?= -O3 -pipe +export LDFLAGS ?= +export CPPFLAGS := -I$(BUILD_PATH)/include + +ifeq ($(HOST_ARCH),$(ARCH)) +CROSS_COMPILE_FLAG := --host=$(CHOST) +CFLAGS += -march=native +STRIP := strip +else +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +export CROSS_COMPILE=$(CBUILD)- +STRIP := $(CBUILD)-strip +endif +ifeq ($(ARCH),aarch64) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),aarch64_be) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),arm) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux +endif +else ifeq ($(ARCH),armeb) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. +LDFLAGS += -Wl,--be8 +endif +else ifeq ($(ARCH),x86_64) +QEMU_ARCH := x86_64 +KERNEL_ARCH := x86_64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu Skylake-Server -machine q35 +CFLAGS += -march=skylake-avx512 +endif +else ifeq ($(ARCH),i686) +QEMU_ARCH := i386 +KERNEL_ARCH := x86 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu coreduo -machine q35 +CFLAGS += -march=prescott +endif +else ifeq ($(ARCH),mips64) +QEMU_ARCH := mips64 +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EB +endif +else ifeq ($(ARCH),mips64el) +QEMU_ARCH := mips64el +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EL +endif +else ifeq ($(ARCH),mips) +QEMU_ARCH := mips +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EB +endif +else ifeq ($(ARCH),mipsel) +QEMU_ARCH := mipsel +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EL +endif +else ifeq ($(ARCH),powerpc64le) +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +CFLAGS += -mcpu=powerpc64le -mlong-double-64 +else ifeq ($(ARCH),powerpc) +QEMU_ARCH := ppc +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 +else +QEMU_MACHINE := -machine ppce500 +endif +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt +else ifeq ($(ARCH),m68k) +QEMU_ARCH := m68k +KERNEL_ARCH := m68k +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +else +QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +endif + +REAL_CC := $(CBUILD)-gcc +MUSL_CC := $(BUILD_PATH)/musl-gcc +export CC := $(MUSL_CC) +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed + +build: $(KERNEL_BZIMAGE) +qemu: $(KERNEL_BZIMAGE) + rm -f $(BUILD_PATH)/result + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ + -nodefaults \ + -nographic \ + -smp $(NR_CPUS) \ + $(QEMU_MACHINE) \ + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + -serial stdio \ + -serial file:$(BUILD_PATH)/result \ + -no-reboot \ + -monitor none \ + -kernel $< + grep -Fq success $(BUILD_PATH)/result + +$(BUILD_PATH)/init-cpio-spec.txt: + mkdir -p $(BUILD_PATH) + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ + echo "dir /dev 755 0 0" >> $@ + echo "nod /dev/console 644 0 0 c 5 1" >> $@ + echo "dir /bin 755 0 0" >> $@ + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ + echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ + echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ + echo "slink /bin/ping6 ping 777 0 0" >> $@ + echo "dir /lib 755 0 0" >> $@ + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + +$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config + mkdir -p $(KERNEL_BUILD_PATH) + cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config + cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig + cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config + $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) + +$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) + +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install + touch $@ + +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) + $(MAKE) -C $(MUSL_PATH) + $(STRIP) -s $@ + +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + touch $@ + +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc + chmod +x $(BUILD_PATH)/musl-gcc + +$(IPERF_PATH)/.installed: $(IPERF_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i '1s/^/#include /' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* + touch $@ + +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no + $(MAKE) -C $(IPERF_PATH) + $(STRIP) -s $@ + +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg + $(STRIP) -s $@ + +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) + mkdir -p $(BUILD_PATH) + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(STRIP) -s $@ + +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) + sed -i /atexit/d $(IPUTILS_PATH)/ping.c + cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS) + $(STRIP) -s $@ + +$(BASH_PATH)/.installed: $(BASH_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + $(MAKE) -C $(BASH_PATH) + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + touch $@ + +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS) + $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $@ + +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure + touch $@ + +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include + $(MAKE) -C $(IPTABLES_PATH) + $(STRIP) -s $@ + +$(NMAP_PATH)/.installed: $(NMAP_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh + $(MAKE) -C $(NMAP_PATH)/libpcap + $(MAKE) -C $(NMAP_PATH)/ncat + $(STRIP) -s $@ + +clean: + rm -rf $(BUILD_PATH) + +distclean: clean + rm -rf $(DISTFILES_PATH) + +menuconfig: $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig + +.PHONY: qemu build clean distclean menuconfig +.DELETE_ON_ERROR: diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config new file mode 100644 index 000000000000..3d063bb247bb --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config new file mode 100644 index 000000000000..dbdc7e406a7b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -0,0 +1,6 @@ +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config new file mode 100644 index 000000000000..148f49905418 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config new file mode 100644 index 000000000000..bd76b07d00a2 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -0,0 +1,10 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config new file mode 100644 index 000000000000..a85025d7206e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config new file mode 100644 index 000000000000..62a15bdb877e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_M68KCLASSIC=y +CONFIG_M68040=y +CONFIG_MAC=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config new file mode 100644 index 000000000000..df71d6b95546 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -0,0 +1,11 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config new file mode 100644 index 000000000000..90c783f725c4 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config @@ -0,0 +1,14 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config new file mode 100644 index 000000000000..435b0b43e00c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config @@ -0,0 +1,15 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config new file mode 100644 index 000000000000..62bb50c4a85f --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -0,0 +1,12 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config new file mode 100644 index 000000000000..57957093b71b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -0,0 +1,10 @@ +CONFIG_PPC_QEMU_E500=y +CONFIG_FSL_SOC_BOOKE=y +CONFIG_PPC_85xx=y +CONFIG_PHYS_64BIT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_MATH_EMULATION=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config new file mode 100644 index 000000000000..990c510a9cfa --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -0,0 +1,12 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config new file mode 100644 index 000000000000..00a1ef4869d5 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config new file mode 100644 index 000000000000..5909e7ef2a5c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -0,0 +1,66 @@ +CONFIG_LOCALVERSION="-debug" +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_VALIDATION=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_PAGE_EXTENSION=y +CONFIG_PAGE_POISONING=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_SLUB_DEBUG_ON=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_HAVE_ARCH_KMEMCHECK=y +CONFIG_HAVE_ARCH_KASAN=y +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_UBSAN=y +CONFIG_UBSAN_SANITIZE_ALL=y +CONFIG_UBSAN_NO_ALIGNMENT=y +CONFIG_UBSAN_NULL=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_WQ_WATCHDOG=y +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_TIMEKEEPING=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_PREEMPT=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_PROVE_RCU=y +CONFIG_SPARSE_RCU_POINTER=y +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +CONFIG_RCU_TRACE=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DOUBLEFAULT=y +CONFIG_X86_DEBUG_FPU=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c new file mode 100644 index 000000000000..90bc9813cadc --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__attribute__((noreturn)) static void poweroff(void) +{ + fflush(stdout); + fflush(stderr); + reboot(RB_AUTOBOOT); + sleep(30); + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); + exit(1); +} + +static void panic(const char *what) +{ + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); + poweroff(); +} + +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") + +static void print_banner(void) +{ + struct utsname utsname; + int len; + + if (uname(&utsname) < 0) + panic("uname"); + + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); +} + +static void seed_rng(void) +{ + int fd; + struct { + int entropy_count; + int buffer_size; + unsigned char buffer[256]; + } entropy = { + .entropy_count = sizeof(entropy.buffer) * 8, + .buffer_size = sizeof(entropy.buffer), + .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" + }; + + if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) + panic("mknod(/dev/urandom)"); + fd = open("/dev/urandom", O_WRONLY); + if (fd < 0) + panic("open(urandom)"); + for (int i = 0; i < 256; ++i) { + if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) + panic("ioctl(urandom)"); + } + close(fd); +} + +static void mount_filesystems(void) +{ + pretty_message("[+] Mounting filesystems..."); + mkdir("/dev", 0755); + mkdir("/proc", 0755); + mkdir("/sys", 0755); + mkdir("/tmp", 0755); + mkdir("/run", 0755); + mkdir("/var", 0755); + if (mount("none", "/dev", "devtmpfs", 0, NULL)) + panic("devtmpfs mount"); + if (mount("none", "/proc", "proc", 0, NULL)) + panic("procfs mount"); + if (mount("none", "/sys", "sysfs", 0, NULL)) + panic("sysfs mount"); + if (mount("none", "/tmp", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/run", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) + ; /* Not a problem if it fails.*/ + if (symlink("/run", "/var/run")) + panic("run symlink"); + if (symlink("/proc/self/fd", "/dev/fd")) + panic("fd symlink"); +} + +static void enable_logging(void) +{ + int fd; + pretty_message("[+] Enabling logging..."); + fd = open("/proc/sys/kernel/printk", O_WRONLY); + if (fd >= 0) { + if (write(fd, "9\n", 2) != 2) + panic("write(printk)"); + close(fd); + } + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(exception-trace)"); + close(fd); + } + fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(panic_on_warn)"); + close(fd); + } +} + +static void kmod_selftests(void) +{ + FILE *file; + char line[2048], *start, *pass; + bool success = true; + pretty_message("[+] Module self-tests:"); + file = fopen("/proc/kmsg", "r"); + if (!file) + panic("fopen(kmsg)"); + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) + panic("fcntl(kmsg, nonblock)"); + while (fgets(line, sizeof(line), file)) { + start = strstr(line, "wireguard: "); + if (!start) + continue; + start += 11; + *strchrnul(start, '\n') = '\0'; + if (strstr(start, "www.wireguard.com")) + break; + pass = strstr(start, ": pass"); + if (!pass || pass[6] != '\0') { + success = false; + printf(" \x1b[31m* %s\x1b[0m\n", start); + } else + printf(" \x1b[32m* %s\x1b[0m\n", start); + } + fclose(file); + if (!success) { + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); + poweroff(); + } +} + +static void launch_tests(void) +{ + char cmdline[4096], *success_dev; + int status, fd; + pid_t pid; + + pretty_message("[+] Launching tests..."); + pid = fork(); + if (pid == -1) + panic("fork"); + else if (pid == 0) { + execl("/init.sh", "init", NULL); + panic("exec"); + } + if (waitpid(pid, &status, 0) < 0) + panic("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + pretty_message("[+] Tests successful! :-)"); + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + panic("open(/proc/cmdline)"); + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) + panic("read(/proc/cmdline)"); + cmdline[sizeof(cmdline) - 1] = '\0'; + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { + if (strncmp(success_dev, "wg.success=", 11)) + continue; + memcpy(success_dev + 11 - 5, "/dev/", 5); + success_dev += 11 - 5; + break; + } + if (!success_dev || !strlen(success_dev)) + panic("Unable to find success device"); + + fd = open(success_dev, O_WRONLY); + if (fd < 0) + panic("open(success_dev)"); + if (write(fd, "success\n", 8) != 8) + panic("write(success_dev)"); + close(fd); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } +} + +static void ensure_console(void) +{ + for (unsigned int i = 0; i < 1000; ++i) { + int fd = open("/dev/console", O_RDWR); + if (fd < 0) { + usleep(50000); + continue; + } + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + if (write(1, "\0\0\0\0\n", 5) == 5) + return; + } + panic("Unable to open console device"); +} + +static void clear_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Starting memory leak detection..."); + write(fd, "clear\n", 5); + close(fd); +} + +static void check_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Scanning for memory leaks..."); + sleep(2); /* Wait for any grace periods. */ + write(fd, "scan\n", 5); + close(fd); + + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); + if (fd < 0) + return; + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) + panic("Memory leaks encountered"); + close(fd); +} + +int main(int argc, char *argv[]) +{ + seed_rng(); + ensure_console(); + print_banner(); + mount_filesystems(); + kmod_selftests(); + enable_logging(); + clear_leaks(); + launch_tests(); + check_leaks(); + poweroff(); + return 1; +} diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config new file mode 100644 index 000000000000..af9323a0b6e0 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -0,0 +1,88 @@ +CONFIG_LOCALVERSION="" +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_NET_IPIP=y +CONFIG_DUMMY=y +CONFIG_VETH=y +CONFIG_MULTIUSER=y +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_NAT_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_TTY=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_VDSO=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PRINTK=y +CONFIG_KALLSYMS=y +CONFIG_BUG=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +CONFIG_JUMP_LABEL=y +CONFIG_EMBEDDED=n +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_SHMEM=y +CONFIG_SLUB=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_NUMA=y +CONFIG_PREEMPT=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HZ_PERIODIC=n +CONFIG_HIGH_RES_TIMERS=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_ARCH_RANDOM=y +CONFIG_FILE_LOCKING=y +CONFIG_POSIX_TIMERS=y +CONFIG_DEVTMPFS=y +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_PRINTK_TIME=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_KERNEL_GZIP=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_TIMEOUT=-1 +CONFIG_STACKTRACE=y +CONFIG_EARLY_PRINTK=y +CONFIG_GDB_SCRIPTS=y +CONFIG_WIREGUARD=y +CONFIG_WIREGUARD_DEBUG=y diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c index d595d72693fb..ed4dc8c14269 100644 --- a/tools/usb/usbip/src/usbip_network.c +++ b/tools/usb/usbip/src/usbip_network.c @@ -50,39 +50,39 @@ void usbip_setup_port_number(char *arg) info("using port %d (\"%s\")", usbip_port, usbip_port_string); } -void usbip_net_pack_uint32_t(int pack, uint32_t *num) +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num) { uint32_t i; if (pack) - i = htonl(*num); + i = htonl(num); else - i = ntohl(*num); + i = ntohl(num); - *num = i; + return i; } -void usbip_net_pack_uint16_t(int pack, uint16_t *num) +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num) { uint16_t i; if (pack) - i = htons(*num); + i = htons(num); else - i = ntohs(*num); + i = ntohs(num); - *num = i; + return i; } void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev) { - usbip_net_pack_uint32_t(pack, &udev->busnum); - usbip_net_pack_uint32_t(pack, &udev->devnum); - usbip_net_pack_uint32_t(pack, &udev->speed); + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum); + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum); + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed); - usbip_net_pack_uint16_t(pack, &udev->idVendor); - usbip_net_pack_uint16_t(pack, &udev->idProduct); - usbip_net_pack_uint16_t(pack, &udev->bcdDevice); + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor); + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct); + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice); } void usbip_net_pack_usb_interface(int pack __attribute__((unused)), @@ -129,6 +129,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen) return usbip_net_xmit(sockfd, buff, bufflen, 1); } +static inline void usbip_net_pack_op_common(int pack, + struct op_common *op_common) +{ + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version); + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code); + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status); +} + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) { struct op_common op_common; @@ -140,7 +148,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) op_common.code = code; op_common.status = status; - PACK_OP_COMMON(1, &op_common); + usbip_net_pack_op_common(1, &op_common); rc = usbip_net_send(sockfd, &op_common, sizeof(op_common)); if (rc < 0) { @@ -164,7 +172,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code, int *status) goto err; } - PACK_OP_COMMON(0, &op_common); + usbip_net_pack_op_common(0, &op_common); if (op_common.version != USBIP_VERSION) { err("USBIP Kernel and tool version mismatch: %d %d:", diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h index 555215eae43e..83b4c5344f72 100644 --- a/tools/usb/usbip/src/usbip_network.h +++ b/tools/usb/usbip/src/usbip_network.h @@ -32,12 +32,6 @@ struct op_common { } __attribute__((packed)); -#define PACK_OP_COMMON(pack, op_common) do {\ - usbip_net_pack_uint16_t(pack, &(op_common)->version);\ - usbip_net_pack_uint16_t(pack, &(op_common)->code);\ - usbip_net_pack_uint32_t(pack, &(op_common)->status);\ -} while (0) - /* ---------------------------------------------------------------------- */ /* Dummy Code */ #define OP_UNSPEC 0x00 @@ -163,11 +157,11 @@ struct op_devlist_reply_extra { } while (0) #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\ - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\ + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\ } while (0) -void usbip_net_pack_uint32_t(int pack, uint32_t *num); -void usbip_net_pack_uint16_t(int pack, uint16_t *num); +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num); +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num); void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev); void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf); diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index c4c57ba99e90..0a356aa91aa1 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -10,10 +10,15 @@ * Author: Christoffer Dall */ +#include #include #include #include +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + /* * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. */ @@ -28,25 +33,115 @@ static const u8 return_offsets[8][2] = { [7] = { 4, 4 }, /* FIQ, unused */ }; +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) { - unsigned long cpsr; - unsigned long new_spsr_value = *vcpu_cpsr(vcpu); - bool is_thumb = (new_spsr_value & PSR_AA32_T_BIT); + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - cpsr = mode | PSR_AA32_I_BIT; - - if (sctlr & (1 << 30)) - cpsr |= PSR_AA32_T_BIT; - if (sctlr & (1 << 25)) - cpsr |= PSR_AA32_E_BIT; - - *vcpu_cpsr(vcpu) = cpsr; + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, new_spsr_value); + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ @@ -84,16 +179,18 @@ static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, fsr = &vcpu_cp15(vcpu, c5_DFSR); } - prepare_fault32(vcpu, PSR_AA32_MODE_ABT | PSR_AA32_A_BIT, vect_offset); + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); *far = addr; /* Give the guest an IMPLEMENTATION DEFINED exception */ is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) - *fsr = 1 << 9 | 0x34; - else - *fsr = 0x14; + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } } void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index f182b2380345..c6c2a9dde00c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -805,6 +805,7 @@ static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, switch (treg) { case TIMER_REG_TVAL: val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); break; case TIMER_REG_CTL: @@ -850,7 +851,7 @@ static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, { switch (treg) { case TIMER_REG_TVAL: - timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + val; + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; break; case TIMER_REG_CTL: diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 70d3b449692c..1bb71acd53f2 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -105,6 +105,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) data = (data ^ mask) - mask; } + if (!vcpu->arch.mmio_decode.sixty_four) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); @@ -125,6 +128,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) unsigned long rt; int access_size; bool sign_extend; + bool sixty_four; if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ @@ -138,11 +142,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); + sixty_four = kvm_vcpu_dabt_issf(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; + vcpu->arch.mmio_decode.sixty_four = sixty_four; return 0; } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 0b32a904a1bb..a2777efb558e 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2147,7 +2147,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8731dfeced8b..4c08fd009768 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -480,25 +480,45 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { + struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; - u64 type, enable, reg; - if (val == 0) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) return; - enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + if (!(val & BIT(i))) continue; - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) - & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) - && (enable & BIT(i))) { - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - if (!reg) - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 98c7360d9fb7..17920d1b350a 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -2475,7 +2475,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 35305d6e68cc..d8ef708a2ef6 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -64,7 +64,7 @@ static void async_pf_execute(struct work_struct *work) struct mm_struct *mm = apf->mm; struct kvm_vcpu *vcpu = apf->vcpu; unsigned long addr = apf->addr; - gva_t gva = apf->gva; + gpa_t cr2_or_gpa = apf->cr2_or_gpa; int locked = 1; might_sleep(); @@ -92,7 +92,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, gva); + trace_kvm_async_pf_completed(addr, cr2_or_gpa); if (swq_has_sleeper(&vcpu->wq)) swake_up_one(&vcpu->wq); @@ -165,8 +165,8 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) } } -int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, - struct kvm_arch_async_pf *arch) +int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, + unsigned long hva, struct kvm_arch_async_pf *arch) { struct kvm_async_pf *work; @@ -185,7 +185,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, work->wakeup_all = false; work->vcpu = vcpu; - work->gva = gva; + work->cr2_or_gpa = cr2_or_gpa; work->addr = hva; work->arch = *arch; work->mm = current->mm; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 00268290dcbd..75b7ee1af1c3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1406,14 +1406,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -1821,26 +1821,72 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); -static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, - struct kvm_host_map *map) +void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache) +{ + if (pfn == 0) + return; + + if (cache) + cache->pfn = cache->gfn = 0; + + if (dirty) + kvm_release_pfn_dirty(pfn); + else + kvm_release_pfn_clean(pfn); +} + +static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn, + struct gfn_to_pfn_cache *cache, u64 gen) +{ + kvm_release_pfn(cache->pfn, cache->dirty, cache); + + cache->pfn = gfn_to_pfn_memslot(slot, gfn); + cache->gfn = gfn; + cache->dirty = false; + cache->generation = gen; +} + +static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool atomic) { kvm_pfn_t pfn; void *hva = NULL; struct page *page = KVM_UNMAPPED_PAGE; + struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn); + u64 gen = slots->generation; if (!map) return -EINVAL; - pfn = gfn_to_pfn_memslot(slot, gfn); + if (cache) { + if (!cache->pfn || cache->gfn != gfn || + cache->generation != gen) { + if (atomic) + return -EAGAIN; + kvm_cache_gfn_to_pfn(slot, gfn, cache, gen); + } + pfn = cache->pfn; + } else { + if (atomic) + return -EAGAIN; + pfn = gfn_to_pfn_memslot(slot, gfn); + } if (is_error_noslot_pfn(pfn)) return -EINVAL; if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - hva = kmap(page); + if (atomic) + hva = kmap_atomic(page); + else + hva = kmap(page); #ifdef CONFIG_HAS_IOMEM - } else { + } else if (!atomic) { hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB); + } else { + return -EINVAL; #endif } @@ -1855,14 +1901,25 @@ static int __kvm_map_gfn(struct kvm_memory_slot *slot, gfn_t gfn, return 0; } +int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool atomic) +{ + return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map, + cache, atomic); +} +EXPORT_SYMBOL_GPL(kvm_map_gfn); + int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map) { - return __kvm_map_gfn(kvm_vcpu_gfn_to_memslot(vcpu, gfn), gfn, map); + return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map, + NULL, false); } EXPORT_SYMBOL_GPL(kvm_vcpu_map); -void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, - bool dirty) +static void __kvm_unmap_gfn(struct kvm_memory_slot *memslot, + struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, + bool dirty, bool atomic) { if (!map) return; @@ -1870,23 +1927,45 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, if (!map->hva) return; - if (map->page != KVM_UNMAPPED_PAGE) - kunmap(map->page); + if (map->page != KVM_UNMAPPED_PAGE) { + if (atomic) + kunmap_atomic(map->hva); + else + kunmap(map->page); + } #ifdef CONFIG_HAS_IOMEM - else + else if (!atomic) memunmap(map->hva); + else + WARN_ONCE(1, "Unexpected unmapping in atomic context"); #endif - if (dirty) { - kvm_vcpu_mark_page_dirty(vcpu, map->gfn); - kvm_release_pfn_dirty(map->pfn); - } else { - kvm_release_pfn_clean(map->pfn); - } + if (dirty) + mark_page_dirty_in_slot(memslot, map->gfn); + + if (cache) + cache->dirty |= dirty; + else + kvm_release_pfn(map->pfn, dirty, NULL); map->hva = NULL; map->page = NULL; } + +int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, + struct gfn_to_pfn_cache *cache, bool dirty, bool atomic) +{ + __kvm_unmap_gfn(gfn_to_memslot(vcpu->kvm, map->gfn), map, + cache, dirty, atomic); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_unmap_gfn); + +void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty) +{ + __kvm_unmap_gfn(kvm_vcpu_gfn_to_memslot(vcpu, map->gfn), map, NULL, + dirty, false); +} EXPORT_SYMBOL_GPL(kvm_vcpu_unmap); struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -2208,12 +2287,12 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2241,12 +2320,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT;