diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata index 2f726c9147522..3daecac48964f 100644 --- a/Documentation/ABI/testing/sysfs-ata +++ b/Documentation/ABI/testing/sysfs-ata @@ -107,13 +107,14 @@ Description: described in ATA8 7.16 and 7.17. Only valid if the device is not a PM. - pio_mode: (RO) Transfer modes supported by the device when - in PIO mode. Mostly used by PATA device. + pio_mode: (RO) PIO transfer mode used by the device. + Mostly used by PATA devices. - xfer_mode: (RO) Current transfer mode + xfer_mode: (RO) Current transfer mode. Mostly used by + PATA devices. - dma_mode: (RO) Transfer modes supported by the device when - in DMA mode. Mostly used by PATA device. + dma_mode: (RO) DMA transfer mode used by the device. + Mostly used by PATA devices. class: (RO) Device class. Can be "ata" for disk, "atapi" for packet device, "pmp" for PM, or diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 2ad01cad7f1c8..bcc974d276dc4 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -526,6 +526,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index 860fe651d6453..5e40b3f437f90 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -37,11 +37,7 @@ Pressure interface Pressure information for each resource is exported through the respective file in /proc/pressure/ -- cpu, memory, and io. -The format for CPU is as such:: - - some avg10=0.00 avg60=0.00 avg300=0.00 total=0 - -and for memory and IO:: +The format is as such:: some avg10=0.00 avg60=0.00 avg300=0.00 total=0 full avg10=0.00 avg60=0.00 avg300=0.00 total=0 @@ -58,6 +54,9 @@ situation from a state where some tasks are stalled but the CPU is still doing productive work. As such, time spent in this subset of the stall state is tracked separately and exported in the "full" averages. +CPU full is undefined at the system level, but has been reported +since 5.13, so it is set to zero for backward compatibility. + The ratios (in %) are tracked as recent trends over ten, sixty, and three hundred second windows, which gives insight into short term events as well as medium and long term trends. The total absolute stall time diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 8cbc711cda935..4df436e7c4177 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -17,3 +17,4 @@ are configurable at compile, boot or run time. special-register-buffer-data-sampling.rst core-scheduling.rst l1d_flush.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 0000000000000..9393c50b5afc9 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,246 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed4..c4893782055b4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3105,6 +3105,7 @@ kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] no_uaccess_flush [PPC] + mmio_stale_data=off [X86] Exceptions: This does not have any effect on @@ -3126,6 +3127,7 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -3135,6 +3137,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index c21b5823f1261..2cf5bae620367 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -32,6 +32,7 @@ the Linux memory management. idle_page_tracking ksm memory-hotplug + multigen_lru nommu-mmap numa_memory_policy numaperf diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst new file mode 100644 index 0000000000000..6355f2b5019d3 --- /dev/null +++ b/Documentation/admin-guide/mm/multigen_lru.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +The multi-gen LRU is an alternative LRU implementation that optimizes +page reclaim and improves performance under memory pressure. Page +reclaim decides the kernel's caching policy and ability to overcommit +memory. It directly impacts the kswapd CPU usage and RAM efficiency. + +Quick start +=========== +Build the kernel with the following configurations. + +* ``CONFIG_LRU_GEN=y`` +* ``CONFIG_LRU_GEN_ENABLED=y`` + +All set! + +Runtime options +=============== +``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the +following subsections. + +Kill switch +----------- +``enabled`` accepts different values to enable or disable the +following components. Its default value depends on +``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled +unless some of them have unforeseen side effects. Writing to +``enabled`` has no effect when a component is not supported by the +hardware, and valid values will be accepted even when the main switch +is off. + +====== =============================================================== +Values Components +====== =============================================================== +0x0001 The main switch for the multi-gen LRU. +0x0002 Clearing the accessed bit in leaf page table entries in large + batches, when MMU sets it (e.g., on x86). This behavior can + theoretically worsen lock contention (mmap_lock). If it is + disabled, the multi-gen LRU will suffer a minor performance + degradation for workloads that contiguously map hot pages, + whose accessed bits can be otherwise cleared by fewer larger + batches. +0x0004 Clearing the accessed bit in non-leaf page table entries as + well, when MMU sets it (e.g., on x86). This behavior was not + verified on x86 varieties other than Intel and AMD. If it is + disabled, the multi-gen LRU will suffer a negligible + performance degradation. +[yYnN] Apply to all the components above. +====== =============================================================== + +E.g., +:: + + echo y >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0007 + echo 5 >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0005 + +Thrashing prevention +-------------------- +Personal computers are more sensitive to thrashing because it can +cause janks (lags when rendering UI) and negatively impact user +experience. The multi-gen LRU offers thrashing prevention to the +majority of laptop and desktop users who do not have ``oomd``. + +Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of +``N`` milliseconds from getting evicted. The OOM killer is triggered +if this working set cannot be kept in memory. In other words, this +option works as an adjustable pressure relief valve, and when open, it +terminates applications that are hopefully not being used. + +Based on the average human detectable lag (~100ms), ``N=1000`` usually +eliminates intolerable janks due to thrashing. Larger values like +``N=3000`` make janks less noticeable at the risk of premature OOM +kills. + +The default value ``0`` means disabled. + +Experimental features +===================== +``/sys/kernel/debug/lru_gen`` accepts commands described in the +following subsections. Multiple command lines are supported, so does +concatenation with delimiters ``,`` and ``;``. + +``/sys/kernel/debug/lru_gen_full`` provides additional stats for +debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from +evicted generations in this file. + +Working set estimation +---------------------- +Working set estimation measures how much memory an application needs +in a given time interval, and it is usually done with little impact on +the performance of the application. E.g., data centers want to +optimize job scheduling (bin packing) to improve memory utilizations. +When a new job comes in, the job scheduler needs to find out whether +each server it manages can allocate a certain amount of memory for +this new job before it can pick a candidate. To do so, the job +scheduler needs to estimate the working sets of the existing jobs. + +When it is read, ``lru_gen`` returns a histogram of numbers of pages +accessed over different time intervals for each memcg and node. +``MAX_NR_GENS`` decides the number of bins for each histogram. The +histograms are noncumulative. +:: + + memcg memcg_id memcg_path + node node_id + min_gen_nr age_in_ms nr_anon_pages nr_file_pages + ... + max_gen_nr age_in_ms nr_anon_pages nr_file_pages + +Each bin contains an estimated number of pages that have been accessed +within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages +and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of +the former is the largest and that of the latter is the smallest. + +Users can write ``+ memcg_id node_id max_gen_nr +[can_swap [force_scan]]`` to ``lru_gen`` to create a new generation +``max_gen_nr+1``. ``can_swap`` defaults to the swap setting and, if it +is set to ``1``, it forces the scan of anon pages when swap is off, +and vice versa. ``force_scan`` defaults to ``1`` and, if it is set to +``0``, it employs heuristics to reduce the overhead, which is likely +to reduce the coverage as well. + +A typical use case is that a job scheduler writes to ``lru_gen`` at a +certain time interval to create new generations, and it ranks the +servers it manages based on the sizes of their cold pages defined by +this time interval. + +Proactive reclaim +----------------- +Proactive reclaim induces page reclaim when there is no memory +pressure. It usually targets cold pages only. E.g., when a new job +comes in, the job scheduler wants to proactively reclaim cold pages on +the server it selected to improve the chance of successfully landing +this new job. + +Users can write ``- memcg_id node_id min_gen_nr [swappiness +[nr_to_reclaim]]`` to ``lru_gen`` to evict generations less than or +equal to ``min_gen_nr``. Note that ``min_gen_nr`` should be less than +``max_gen_nr-1`` as ``max_gen_nr`` and ``max_gen_nr-1`` are not fully +aged and therefore cannot be evicted. ``swappiness`` overrides the +default value in ``/proc/sys/vm/swappiness``. ``nr_to_reclaim`` limits +the number of pages to evict. + +A typical use case is that a job scheduler writes to ``lru_gen`` +before it tries to land a new job on a server. If it fails to +materialize enough cold pages because of the overestimation, it +retries on the next server according to the ranking result obtained +from the working set estimation step. This less forceful approach +limits the impacts on the existing jobs. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 1144ea3229a37..e9c18dabc5523 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -994,6 +994,9 @@ This is a directory, with the following entries: * ``boot_id``: a UUID generated the first time this is retrieved, and unvarying after that; +* ``uuid``: a UUID generated every time this is retrieved (this can + thus be used to generate UUIDs at will); + * ``entropy_avail``: the pool's entropy count, in bits; * ``poolsize``: the entropy pool size, in bits; @@ -1001,10 +1004,7 @@ This is a directory, with the following entries: * ``urandom_min_reseed_secs``: obsolete (used to determine the minimum number of seconds between urandom pool reseeding). This file is writable for compatibility purposes, but writing to it has no effect - on any RNG behavior. - -* ``uuid``: a UUID generated every time this is retrieved (this can - thus be used to generate UUIDs at will); + on any RNG behavior; * ``write_wakeup_threshold``: when the entropy count drops below this (as a number of bits), processes waiting to write to ``/dev/random`` diff --git a/Documentation/conf.py b/Documentation/conf.py index 072ee31a301dc..934727e23e0eb 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -161,7 +161,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 0cebaaefda032..419c3b2ac5a6f 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -72,6 +72,7 @@ examples: dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; rotation = <270>; + backlight = <&backlight>; }; }; diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt index 146e554b3c676..2a80e272cd666 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-altera.txt +++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt @@ -9,8 +9,9 @@ Required properties: - The second cell is reserved and is currently unused. - gpio-controller : Marks the device node as a GPIO controller. - interrupt-controller: Mark the device node as an interrupt controller -- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware. +- #interrupt-cells : Should be 2. The interrupt type is fixed in the hardware. - The first cell is the GPIO offset number within the GPIO controller. + - The second cell is the interrupt trigger type and level flags. - interrupts: Specify the interrupt. - altr,interrupt-type: Specifies the interrupt trigger type the GPIO hardware is synthesized. This field is required if the Altera GPIO controller @@ -38,6 +39,6 @@ gpio_altr: gpio@ff200000 { altr,interrupt-type = ; #gpio-cells = <2>; gpio-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; interrupt-controller; }; diff --git a/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml b/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml new file mode 100644 index 0000000000000..358b262431fc5 --- /dev/null +++ b/Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/hwmon/nuvoton,nct6775.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Nuvoton NCT6775 and compatible Super I/O chips + +maintainers: + - Zev Weiss + +properties: + compatible: + enum: + - nuvoton,nct6106 + - nuvoton,nct6116 + - nuvoton,nct6775 + - nuvoton,nct6776 + - nuvoton,nct6779 + - nuvoton,nct6791 + - nuvoton,nct6792 + - nuvoton,nct6793 + - nuvoton,nct6795 + - nuvoton,nct6796 + - nuvoton,nct6797 + - nuvoton,nct6798 + + reg: + maxItems: 1 + + nuvoton,tsi-channel-mask: + description: + Bitmask indicating which TSI temperature sensor channels are + active. LSB is TSI0, bit 1 is TSI1, etc. + $ref: /schemas/types.yaml#/definitions/uint32 + maximum: 0xff + default: 0 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + superio@4d { + compatible = "nuvoton,nct6779"; + reg = <0x4d>; + nuvoton,tsi-channel-mask = <0x03>; + }; + }; diff --git a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml index 61dd5af80db67..37402c370fbbc 100644 --- a/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/mt6315-regulator.yaml @@ -31,7 +31,7 @@ properties: $ref: "regulator.yaml#" properties: - regulator-name: + regulator-compatible: pattern: "^vbuck[1-4]$" additionalProperties: false @@ -55,7 +55,7 @@ examples: regulator-min-microvolt = <300000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; - regulator-allowed-modes = <0 1 2 4>; + regulator-allowed-modes = <0 1 2>; }; vbuck3 { @@ -63,7 +63,7 @@ examples: regulator-min-microvolt = <300000>; regulator-max-microvolt = <1193750>; regulator-enable-ramp-delay = <256>; - regulator-allowed-modes = <0 1 2 4>; + regulator-allowed-modes = <0 1 2>; }; }; }; diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml index 5b693a2d049c1..d55b861db605f 100644 --- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml +++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml @@ -23,11 +23,13 @@ properties: reg: description: - Should contain the address ranges for memory regions SRAM, CFG, and - L1TCM. + Should contain the address ranges for memory regions SRAM, CFG, and, + on some platforms, L1TCM. + minItems: 2 maxItems: 3 reg-names: + minItems: 2 items: - const: sram - const: cfg @@ -47,16 +49,30 @@ required: - reg - reg-names -if: - properties: - compatible: - enum: - - mediatek,mt8183-scp - - mediatek,mt8192-scp -then: - required: - - clocks - - clock-names +allOf: + - if: + properties: + compatible: + enum: + - mediatek,mt8183-scp + - mediatek,mt8192-scp + then: + required: + - clocks + - clock-names + + - if: + properties: + compatible: + enum: + - mediatek,mt8183-scp + - mediatek,mt8186-scp + then: + properties: + reg: + maxItems: 2 + reg-names: + maxItems: 2 additionalProperties: type: object @@ -76,10 +92,10 @@ additionalProperties: examples: - | - #include + #include scp@10500000 { - compatible = "mediatek,mt8183-scp"; + compatible = "mediatek,mt8192-scp"; reg = <0x10500000 0x80000>, <0x10700000 0x8000>, <0x10720000 0xe0000>; diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml index b32457c2fc0b0..3361218e278f3 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smd-rpm.yaml @@ -34,6 +34,7 @@ properties: - qcom,rpm-ipq6018 - qcom,rpm-msm8226 - qcom,rpm-msm8916 + - qcom,rpm-msm8936 - qcom,rpm-msm8953 - qcom,rpm-msm8974 - qcom,rpm-msm8976 diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml index 5a60fba14bba0..44d08aa3fd85d 100644 --- a/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml +++ b/Documentation/devicetree/bindings/spi/qcom,spi-qcom-qspi.yaml @@ -49,6 +49,7 @@ properties: maxItems: 2 interconnect-names: + minItems: 1 items: - const: qspi-config - const: qspi-memory diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst index 96668dca753a8..372bdb4d04c6d 100644 --- a/Documentation/driver-api/thermal/intel_dptf.rst +++ b/Documentation/driver-api/thermal/intel_dptf.rst @@ -4,7 +4,7 @@ Intel(R) Dynamic Platform and Thermal Framework Sysfs Interface =============================================================== -:Copyright: |copy| 2022 Intel Corporation +:Copyright: © 2022 Intel Corporation :Author: Srinivas Pandruvada diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst index e7e8f1640f457..00d8c46ef9e04 100644 --- a/Documentation/hwmon/asus_ec_sensors.rst +++ b/Documentation/hwmon/asus_ec_sensors.rst @@ -4,18 +4,22 @@ Kernel driver asus_ec_sensors ================================= Supported boards: - * PRIME X570-PRO, - * Pro WS X570-ACE, - * ROG CROSSHAIR VIII DARK HERO, + * PRIME X470-PRO + * PRIME X570-PRO + * Pro WS X570-ACE + * ProArt X570-CREATOR WIFI + * ROG CROSSHAIR VIII DARK HERO * ROG CROSSHAIR VIII HERO (WI-FI) - * ROG CROSSHAIR VIII FORMULA, - * ROG CROSSHAIR VIII HERO, - * ROG CROSSHAIR VIII IMPACT, - * ROG STRIX B550-E GAMING, - * ROG STRIX B550-I GAMING, - * ROG STRIX X570-E GAMING, - * ROG STRIX X570-F GAMING, + * ROG CROSSHAIR VIII FORMULA + * ROG CROSSHAIR VIII HERO + * ROG CROSSHAIR VIII IMPACT + * ROG STRIX B550-E GAMING + * ROG STRIX B550-I GAMING + * ROG STRIX X570-E GAMING + * ROG STRIX X570-E GAMING WIFI II + * ROG STRIX X570-F GAMING * ROG STRIX X570-I GAMING + * ROG STRIX Z690-A GAMING WIFI D4 Authors: - Eugene Shalygin @@ -52,3 +56,5 @@ Module Parameters the path is mostly identical for them). If ASUS changes this path in a future BIOS update, this parameter can be used to override the stored in the driver value until it gets updated. + A special string ":GLOBAL_LOCK" can be passed to use the ACPI + global lock instead of a dedicated mutex. diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index fb496b2ebfd38..92d3432460d75 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -77,7 +77,7 @@ as you intend it to. The maintainer will thank you if you write your patch description in a form which can be easily pulled into Linux's source code management -system, ``git``, as a "commit log". See :ref:`explicit_in_reply_to`. +system, ``git``, as a "commit log". See :ref:`the_canonical_patch_format`. Solve only one problem per patch. If your description starts to get long, that's a sign that you probably need to split up your patch. diff --git a/Documentation/sound/alsa-configuration.rst b/Documentation/sound/alsa-configuration.rst index 34888d4fc4a83..21ab5e6f7062f 100644 --- a/Documentation/sound/alsa-configuration.rst +++ b/Documentation/sound/alsa-configuration.rst @@ -2246,7 +2246,7 @@ implicit_fb Apply the generic implicit feedback sync mode. When this is set and the playback stream sync mode is ASYNC, the driver tries to tie an adjacent ASYNC capture stream as the implicit feedback - source. + source. This is equivalent with quirk_flags bit 17. use_vmalloc Use vmalloc() for allocations of the PCM buffers (default: yes). For architectures with non-coherent memory like ARM or MIPS, the @@ -2288,6 +2288,8 @@ quirk_flags * bit 14: Ignore errors for mixer access * bit 15: Support generic DSD raw U32_BE format * bit 16: Set up the interface at first like UAC1 + * bit 17: Apply the generic implicit feedback sync mode + * bit 18: Don't apply implicit feedback sync mode This module supports multiple devices, autoprobe and hotplugging. diff --git a/Documentation/tools/rtla/Makefile b/Documentation/tools/rtla/Makefile index 9f2b84af1a6c7..093af6d7a0e93 100644 --- a/Documentation/tools/rtla/Makefile +++ b/Documentation/tools/rtla/Makefile @@ -17,9 +17,21 @@ DOC_MAN1 = $(addprefix $(OUTPUT),$(_DOC_MAN1)) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) RST2MAN_OPTS += --verbose +TEST_RST2MAN = $(shell sh -c "rst2man --version > /dev/null 2>&1 || echo n") + $(OUTPUT)%.1: %.rst ifndef RST2MAN_DEP - $(error "rst2man not found, but required to generate man pages") + $(info ********************************************) + $(info ** NOTICE: rst2man not found) + $(info **) + $(info ** Consider installing the latest rst2man from your) + $(info ** distribution, e.g., 'dnf install python3-docutils' on Fedora,) + $(info ** or from source:) + $(info **) + $(info ** https://docutils.sourceforge.io/docs/dev/repository.html ) + $(info **) + $(info ********************************************) + $(error NOTICE: rst2man required to generate man pages) endif rst2man $(RST2MAN_OPTS) $< > $@ diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index f35552ff19ba8..b68e7a51009f8 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -267,8 +267,8 @@ restrict such paths with dedicated ruleset flags. Ruleset layers -------------- -There is a limit of 64 layers of stacked rulesets. This can be an issue for a -task willing to enforce a new ruleset in complement to its 64 inherited +There is a limit of 16 layers of stacked rulesets. This can be an issue for a +task willing to enforce a new ruleset in complement to its 16 inherited rulesets. Once this limit is reached, sys_landlock_restrict_self() returns E2BIG. It is then strongly suggested to carefully build rulesets once in the life of a thread, especially for applications able to launch other applications diff --git a/Documentation/userspace-api/media/lirc.h.rst.exceptions b/Documentation/userspace-api/media/lirc.h.rst.exceptions index 913d17b498313..1aeb7d7afe13b 100644 --- a/Documentation/userspace-api/media/lirc.h.rst.exceptions +++ b/Documentation/userspace-api/media/lirc.h.rst.exceptions @@ -30,6 +30,8 @@ ignore define LIRC_CAN_REC ignore define LIRC_CAN_SEND_MASK ignore define LIRC_CAN_REC_MASK +ignore define LIRC_CAN_SET_REC_FILTER +ignore define LIRC_CAN_NOTIFY_DECODE # Obsolete ioctls diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 44365c4574a37..b484343002269 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -25,6 +25,7 @@ algorithms. If you are looking for advice on simply allocating memory, see the ksm memory-model mmu_notifier + multigen_lru numa overcommit-accounting page_migration diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst new file mode 100644 index 0000000000000..bc8eaf1b956c8 --- /dev/null +++ b/Documentation/vm/multigen_lru.rst @@ -0,0 +1,159 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +The multi-gen LRU is an alternative LRU implementation that optimizes +page reclaim and improves performance under memory pressure. Page +reclaim decides the kernel's caching policy and ability to overcommit +memory. It directly impacts the kswapd CPU usage and RAM efficiency. + +Design overview +=============== +Objectives +---------- +The design objectives are: + +* Good representation of access recency +* Try to profit from spatial locality +* Fast paths to make obvious choices +* Simple self-correcting heuristics + +The representation of access recency is at the core of all LRU +implementations. In the multi-gen LRU, each generation represents a +group of pages with similar access recency. Generations establish a +(time-based) common frame of reference and therefore help make better +choices, e.g., between different memcgs on a computer or different +computers in a data center (for job scheduling). + +Exploiting spatial locality improves efficiency when gathering the +accessed bit. A rmap walk targets a single page and does not try to +profit from discovering a young PTE. A page table walk can sweep all +the young PTEs in an address space, but the address space can be too +sparse to make a profit. The key is to optimize both methods and use +them in combination. + +Fast paths reduce code complexity and runtime overhead. Unmapped pages +do not require TLB flushes; clean pages do not require writeback. +These facts are only helpful when other conditions, e.g., access +recency, are similar. With generations as a common frame of reference, +additional factors stand out. But obvious choices might not be good +choices; thus self-correction is necessary. + +The benefits of simple self-correcting heuristics are self-evident. +Again, with generations as a common frame of reference, this becomes +attainable. Specifically, pages in the same generation can be +categorized based on additional factors, and a feedback loop can +statistically compare the refault percentages across those categories +and infer which of them are better choices. + +Assumptions +----------- +The protection of hot pages and the selection of cold pages are based +on page access channels and patterns. There are two access channels: + +* Accesses through page tables +* Accesses through file descriptors + +The protection of the former channel is by design stronger because: + +1. The uncertainty in determining the access patterns of the former + channel is higher due to the approximation of the accessed bit. +2. The cost of evicting the former channel is higher due to the TLB + flushes required and the likelihood of encountering the dirty bit. +3. The penalty of underprotecting the former channel is higher because + applications usually do not prepare themselves for major page + faults like they do for blocked I/O. E.g., GUI applications + commonly use dedicated I/O threads to avoid blocking rendering + threads. + +There are also two access patterns: + +* Accesses exhibiting temporal locality +* Accesses not exhibiting temporal locality + +For the reasons listed above, the former channel is assumed to follow +the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is +present, and the latter channel is assumed to follow the latter +pattern unless outlying refaults have been observed. + +Workflow overview +================= +Evictable pages are divided into multiple generations for each +``lruvec``. The youngest generation number is stored in +``lrugen->max_seq`` for both anon and file types as they are aged on +an equal footing. The oldest generation numbers are stored in +``lrugen->min_seq[]`` separately for anon and file types as clean file +pages can be evicted regardless of swap constraints. These three +variables are monotonically increasing. + +Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` +bits in order to fit into the gen counter in ``folio->flags``. Each +truncated generation number is an index to ``lrugen->lists[]``. The +sliding window technique is used to track at least ``MIN_NR_GENS`` and +at most ``MAX_NR_GENS`` generations. The gen counter stores a value +within ``[1, MAX_NR_GENS]`` while a page is on one of +``lrugen->lists[]``; otherwise it stores zero. + +Each generation is divided into multiple tiers. Tiers represent +different ranges of numbers of accesses through file descriptors. A +page accessed ``N`` times through file descriptors is in tier +``order_base_2(N)``. In contrast to moving across generations, which +requires the LRU lock, moving across tiers only involves atomic +operations on ``folio->flags`` and therefore has a negligible cost. A +feedback loop modeled after the PID controller monitors refaults over +all the tiers from anon and file types and decides which tiers from +which types to evict or protect. + +There are two conceptually independent procedures: the aging and the +eviction. They form a closed-loop system, i.e., the page reclaim. + +Aging +----- +The aging produces young generations. Given an ``lruvec``, it +increments ``max_seq`` when ``max_seq-min_seq+1`` approaches +``MIN_NR_GENS``. The aging promotes hot pages to the youngest +generation when it finds them accessed through page tables; the +demotion of cold pages happens consequently when it increments +``max_seq``. The aging uses page table walks and rmap walks to find +young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list`` +and calls ``walk_page_range()`` with each ``mm_struct`` on this list +to scan PTEs, and after each iteration, it increments ``max_seq``. For +the latter, when the eviction walks the rmap and finds a young PTE, +the aging scans the adjacent PTEs. For both, on finding a young PTE, +the aging clears the accessed bit and updates the gen counter of the +page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``. + +Eviction +-------- +The eviction consumes old generations. Given an ``lruvec``, it +increments ``min_seq`` when ``lrugen->lists[]`` indexed by +``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to +evict from, it first compares ``min_seq[]`` to select the older type. +If both types are equally old, it selects the one whose first tier has +a lower refault percentage. The first tier contains single-use +unmapped clean pages, which are the best bet. The eviction sorts a +page according to its gen counter if the aging has found this page +accessed through page tables and updated its gen counter. It also +moves a page to the next generation, i.e., ``min_seq+1``, if this page +was accessed multiple times through file descriptors and the feedback +loop has detected outlying refaults from the tier this page is in. To +do this, the feedback loop uses the first tier as the baseline, for +the reason stated earlier. + +Summary +------- +The multi-gen LRU can be disassembled into the following parts: + +* Generations +* Page table walks +* Rmap walks +* Bloom filters +* PID controller + +The aging and the eviction form a producer-consumer model; +specifically, the latter drives the former by the sliding window over +generations. Within the aging, rmap walks drive page table walks by +inserting hot densely populated page tables to the Bloom filters. +Within the eviction, the PID controller uses refaults as the feedback +to select types to evict and tiers to protect. diff --git a/MAINTAINERS b/MAINTAINERS index f468864fd268c..51a6ba822792b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13536,12 +13536,21 @@ M: Samuel Mendoza-Jonas S: Maintained F: net/ncsi/ -NCT6775 HARDWARE MONITOR DRIVER +NCT6775 HARDWARE MONITOR DRIVER - CORE & PLATFORM DRIVER M: Guenter Roeck L: linux-hwmon@vger.kernel.org S: Maintained F: Documentation/hwmon/nct6775.rst -F: drivers/hwmon/nct6775.c +F: drivers/hwmon/nct6775-core.c +F: drivers/hwmon/nct6775-platform.c +F: drivers/hwmon/nct6775.h + +NCT6775 HARDWARE MONITOR DRIVER - I2C DRIVER +M: Zev Weiss +L: linux-hwmon@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/hwmon/nuvoton,nct6775.yaml +F: drivers/hwmon/nct6775-i2c.c NETDEVSIM M: Jakub Kicinski diff --git a/Makefile b/Makefile index 7d5b0bfe79602..997d229c44e09 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf2 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 31c4fdc4a4baa..8a2cb732b09ed 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1376,6 +1376,14 @@ config DYNAMIC_SIGFRAME config HAVE_ARCH_NODE_DEV_GROUP bool +config ARCH_HAS_NONLEAF_PMD_YOUNG + bool + help + Architectures that select this option are capable of setting the + accessed bit in non-leaf PMD entries when using them as part of linear + address translations. Page table walkers that clear the accessed bit + may use this capability to reduce their search space. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h index 18f48a6f2ff6d..8f3f5eecba28b 100644 --- a/arch/alpha/include/asm/page.h +++ b/arch/alpha/include/asm/page.h @@ -18,7 +18,7 @@ extern void clear_page(void *page); #define clear_user_page(page, vaddr, pg) clear_page(page) #define alloc_zeroed_user_highpage_movable(vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr) + alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE extern void copy_page(void * _to, void * _from); diff --git a/arch/alpha/include/asm/timex.h b/arch/alpha/include/asm/timex.h index b565cc6f408e9..f89798da8a147 100644 --- a/arch/alpha/include/asm/timex.h +++ b/arch/alpha/include/asm/timex.h @@ -28,5 +28,6 @@ static inline cycles_t get_cycles (void) __asm__ __volatile__ ("rpcc %0" : "=r"(ret)); return ret; } +#define get_cycles get_cycles #endif diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3515bc4f16a4f..00ff721da300e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -490,3 +490,4 @@ 558 common process_mrelease sys_process_mrelease 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall +561 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts index b7eb552640cbf..788448cdd6b3f 100644 --- a/arch/arm/boot/dts/aspeed-ast2600-evb.dts +++ b/arch/arm/boot/dts/aspeed-ast2600-evb.dts @@ -103,7 +103,7 @@ &mac0 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-rxid"; phy-handle = <ðphy0>; pinctrl-names = "default"; @@ -114,7 +114,7 @@ &mac1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rgmii-rxid"; phy-handle = <ðphy1>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts index 1b63d6b19750b..25d87212cefd3 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts @@ -53,18 +53,17 @@ "GPIO18", "NC", /* GPIO19 */ "NC", /* GPIO20 */ - "GPIO21", + "CAM_GPIO0", "GPIO22", "GPIO23", "GPIO24", "GPIO25", "NC", /* GPIO26 */ - "CAM_GPIO0", - /* Binary number representing build/revision */ - "CONFIG0", - "CONFIG1", - "CONFIG2", - "CONFIG3", + "GPIO27", + "GPIO28", + "GPIO29", + "GPIO30", + "GPIO31", "NC", /* GPIO32 */ "NC", /* GPIO33 */ "NC", /* GPIO34 */ diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts index 243236bc1e00b..8b043ab62dc83 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts @@ -74,16 +74,18 @@ "GPIO27", "SDA0", "SCL0", - "NC", /* GPIO30 */ - "NC", /* GPIO31 */ - "NC", /* GPIO32 */ - "NC", /* GPIO33 */ - "NC", /* GPIO34 */ - "NC", /* GPIO35 */ - "NC", /* GPIO36 */ - "NC", /* GPIO37 */ - "NC", /* GPIO38 */ - "NC", /* GPIO39 */ + /* Used by BT module */ + "CTS0", + "RTS0", + "TXD0", + "RXD0", + /* Used by Wifi */ + "SD1_CLK", + "SD1_CMD", + "SD1_DATA0", + "SD1_DATA1", + "SD1_DATA2", + "SD1_DATA3", "CAM_GPIO1", /* GPIO40 */ "WL_ON", /* GPIO41 */ "NC", /* GPIO42 */ diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index e12938baaf12c..c263f5b48b96b 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -45,7 +45,7 @@ #gpio-cells = <2>; gpio-line-names = "BT_ON", "WL_ON", - "STATUS_LED_R", + "PWR_LED_R", "LAN_RUN", "", "CAM_GPIO0", diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts index 588d9411ceb61..3dfce4312dfc4 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-cm3-io3.dts @@ -63,8 +63,8 @@ "GPIO43", "GPIO44", "GPIO45", - "GPIO46", - "GPIO47", + "SMPS_SCL", + "SMPS_SDA", /* Used by eMMC */ "SD_CLK_R", "SD_CMD_R", diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 603c700c706f2..65f8a759f1e31 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -455,7 +455,7 @@ reg = <0x180 0x4>; }; - pinctrl: pin-controller@1c0 { + pinctrl: pinctrl@1c0 { compatible = "brcm,bcm4708-pinmux"; reg = <0x1c0 0x24>; reg-names = "cru_gpio_control"; diff --git a/arch/arm/boot/dts/exynos5250-smdk5250.dts b/arch/arm/boot/dts/exynos5250-smdk5250.dts index 21fbbf3d8684d..71293749ac481 100644 --- a/arch/arm/boot/dts/exynos5250-smdk5250.dts +++ b/arch/arm/boot/dts/exynos5250-smdk5250.dts @@ -129,7 +129,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@50 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x50>; }; @@ -289,7 +289,7 @@ samsung,i2c-max-bus-freq = <20000>; eeprom@51 { - compatible = "samsung,s524ad0xd1"; + compatible = "samsung,s524ad0xd1", "atmel,24c128"; reg = <0x51>; }; diff --git a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts index b4a9523e325b4..864dc5018451f 100644 --- a/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts +++ b/arch/arm/boot/dts/imx6dl-eckelmann-ci4x10.dts @@ -297,7 +297,11 @@ phy-mode = "rmii"; phy-reset-gpios = <&gpio1 18 GPIO_ACTIVE_LOW>; phy-handle = <&phy>; - clocks = <&clks IMX6QDL_CLK_ENET>, <&clks IMX6QDL_CLK_ENET>, <&rmii_clk>; + clocks = <&clks IMX6QDL_CLK_ENET>, + <&clks IMX6QDL_CLK_ENET>, + <&rmii_clk>, + <&clks IMX6QDL_CLK_ENET_REF>; + clock-names = "ipg", "ahb", "ptp", "enet_out"; status = "okay"; mdio { diff --git a/arch/arm/boot/dts/imx6qdl-colibri.dtsi b/arch/arm/boot/dts/imx6qdl-colibri.dtsi index 4e2a309c93fa8..1e86b38147080 100644 --- a/arch/arm/boot/dts/imx6qdl-colibri.dtsi +++ b/arch/arm/boot/dts/imx6qdl-colibri.dtsi @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ OR MIT /* - * Copyright 2014-2020 Toradex + * Copyright 2014-2022 Toradex * Copyright 2012 Freescale Semiconductor, Inc. * Copyright 2011 Linaro Ltd. */ @@ -132,7 +132,7 @@ clock-frequency = <100000>; pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c2>; - pinctrl-0 = <&pinctrl_i2c2_gpio>; + pinctrl-1 = <&pinctrl_i2c2_gpio>; scl-gpios = <&gpio2 30 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; sda-gpios = <&gpio3 16 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; status = "okay"; @@ -488,7 +488,7 @@ >; }; - pinctrl_i2c2_gpio: i2c2grp { + pinctrl_i2c2_gpio: i2c2gpiogrp { fsl,pins = < MX6QDL_PAD_EIM_EB2__GPIO2_IO30 0x4001b8b1 MX6QDL_PAD_EIM_D16__GPIO3_IO16 0x4001b8b1 diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi index 7d28696480508..5e9cbc8cdcbce 100644 --- a/arch/arm/boot/dts/lan966x.dtsi +++ b/arch/arm/boot/dts/lan966x.dtsi @@ -114,9 +114,9 @@ compatible = "atmel,at91sam9g46-aes"; reg = <0xe004c000 0x100>; interrupts = ; - dmas = <&dma0 AT91_XDMAC_DT_PERID(13)>, - <&dma0 AT91_XDMAC_DT_PERID(12)>; - dma-names = "rx", "tx"; + dmas = <&dma0 AT91_XDMAC_DT_PERID(12)>, + <&dma0 AT91_XDMAC_DT_PERID(13)>; + dma-names = "tx", "rx"; clocks = <&nic_clk>; clock-names = "aes_clk"; }; diff --git a/arch/arm/boot/dts/ox820.dtsi b/arch/arm/boot/dts/ox820.dtsi index 90846a7655b49..dde4364892bf0 100644 --- a/arch/arm/boot/dts/ox820.dtsi +++ b/arch/arm/boot/dts/ox820.dtsi @@ -287,7 +287,7 @@ clocks = <&armclk>; }; - gic: gic@1000 { + gic: interrupt-controller@1000 { compatible = "arm,arm11mp-gic"; interrupt-controller; #interrupt-cells = <3>; diff --git a/arch/arm/boot/dts/qcom-sdx65.dtsi b/arch/arm/boot/dts/qcom-sdx65.dtsi index 796641d30e06c..0c3f93603adc8 100644 --- a/arch/arm/boot/dts/qcom-sdx65.dtsi +++ b/arch/arm/boot/dts/qcom-sdx65.dtsi @@ -202,7 +202,7 @@ , ; - rpmhcc: clock-controller@1 { + rpmhcc: clock-controller { compatible = "qcom,sdx65-rpmh-clk"; #clock-cells = <1>; clock-names = "xo"; diff --git a/arch/arm/boot/dts/s5pv210-aries.dtsi b/arch/arm/boot/dts/s5pv210-aries.dtsi index c8f1c324a6c26..962266c24aac4 100644 --- a/arch/arm/boot/dts/s5pv210-aries.dtsi +++ b/arch/arm/boot/dts/s5pv210-aries.dtsi @@ -564,7 +564,6 @@ reset-gpios = <&mp05 5 GPIO_ACTIVE_LOW>; vdd3-supply = <&ldo7_reg>; vci-supply = <&ldo17_reg>; - spi-cs-high; spi-max-frequency = <1200000>; pinctrl-names = "default"; @@ -636,7 +635,7 @@ }; &i2s0 { - dmas = <&pdma0 9>, <&pdma0 10>, <&pdma0 11>; + dmas = <&pdma0 10>, <&pdma0 9>, <&pdma0 11>; status = "okay"; }; @@ -895,7 +894,7 @@ device-wakeup-gpios = <&gpg3 4 GPIO_ACTIVE_HIGH>; interrupt-parent = <&gph2>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; - interrupt-names = "host-wake"; + interrupt-names = "host-wakeup"; }; }; diff --git a/arch/arm/boot/dts/s5pv210.dtsi b/arch/arm/boot/dts/s5pv210.dtsi index 353ba7b09a0c0..c5265f3ae31d6 100644 --- a/arch/arm/boot/dts/s5pv210.dtsi +++ b/arch/arm/boot/dts/s5pv210.dtsi @@ -239,8 +239,8 @@ reg = <0xeee30000 0x1000>; interrupt-parent = <&vic2>; interrupts = <16>; - dma-names = "rx", "tx", "tx-sec"; - dmas = <&pdma1 9>, <&pdma1 10>, <&pdma1 11>; + dma-names = "tx", "rx", "tx-sec"; + dmas = <&pdma1 10>, <&pdma1 9>, <&pdma1 11>; clock-names = "iis", "i2s_opclk0", "i2s_opclk1"; @@ -259,8 +259,8 @@ reg = <0xe2100000 0x1000>; interrupt-parent = <&vic2>; interrupts = <17>; - dma-names = "rx", "tx"; - dmas = <&pdma1 12>, <&pdma1 13>; + dma-names = "tx", "rx"; + dmas = <&pdma1 13>, <&pdma1 12>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S1>, <&clocks SCLK_AUDIO1>; pinctrl-names = "default"; @@ -274,8 +274,8 @@ reg = <0xe2a00000 0x1000>; interrupt-parent = <&vic2>; interrupts = <18>; - dma-names = "rx", "tx"; - dmas = <&pdma1 14>, <&pdma1 15>; + dma-names = "tx", "rx"; + dmas = <&pdma1 15>, <&pdma1 14>; clock-names = "iis", "i2s_opclk0"; clocks = <&clocks CLK_I2S2>, <&clocks SCLK_AUDIO2>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi index f691c8f08d047..b632631296928 100644 --- a/arch/arm/boot/dts/sama7g5.dtsi +++ b/arch/arm/boot/dts/sama7g5.dtsi @@ -857,7 +857,6 @@ #interrupt-cells = <3>; #address-cells = <0>; interrupt-controller; - interrupt-parent; reg = <0xe8c11000 0x1000>, <0xe8c12000 0x2000>; }; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 7c1d6423d7f8c..b8c5dd7860cb2 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -46,7 +46,7 @@ <0xff113000 0x1000>; }; - intc: intc@fffed000 { + intc: interrupt-controller@fffed000 { compatible = "arm,cortex-a9-gic"; #interrupt-cells = <3>; interrupt-controller; diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index 3ba431dfa8c94..f1e50d2e623a3 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -38,7 +38,7 @@ <0xff113000 0x1000>; }; - intc: intc@ffffd000 { + intc: interrupt-controller@ffffd000 { compatible = "arm,cortex-a9-gic"; #interrupt-cells = <3>; interrupt-controller; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi index 61e17f44ce815..76c54b006d871 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-avenger96.dtsi @@ -141,6 +141,7 @@ compatible = "snps,dwmac-mdio"; reset-gpios = <&gpioz 2 GPIO_ACTIVE_LOW>; reset-delay-us = <1000>; + reset-post-delay-us = <1000>; phy0: ethernet-phy@7 { reg = <7>; diff --git a/arch/arm/boot/dts/suniv-f1c100s.dtsi b/arch/arm/boot/dts/suniv-f1c100s.dtsi index 6100d3b75f613..def8301014487 100644 --- a/arch/arm/boot/dts/suniv-f1c100s.dtsi +++ b/arch/arm/boot/dts/suniv-f1c100s.dtsi @@ -104,8 +104,10 @@ wdt: watchdog@1c20ca0 { compatible = "allwinner,suniv-f1c100s-wdt", - "allwinner,sun4i-a10-wdt"; + "allwinner,sun6i-a31-wdt"; reg = <0x01c20ca0 0x20>; + interrupts = <16>; + clocks = <&osc32k>; }; uart0: serial@1c25000 { diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index 413abfb42989e..f82a819eb0dbb 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -48,6 +48,7 @@ static inline u32 read_ ## a64(void) \ return read_sysreg(a32); \ } \ +CPUIF_MAP(ICC_EOIR1, ICC_EOIR1_EL1) CPUIF_MAP(ICC_PMR, ICC_PMR_EL1) CPUIF_MAP(ICC_AP0R0, ICC_AP0R0_EL1) CPUIF_MAP(ICC_AP0R1, ICC_AP0R1_EL1) @@ -63,12 +64,6 @@ CPUIF_MAP(ICC_AP1R3, ICC_AP1R3_EL1) /* Low-level accessors */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg(irq, ICC_EOIR1); - isb(); -} - static inline void gic_write_dir(u32 val) { write_sysreg(val, ICC_DIR); diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h index 7c3b3671d6c25..6d1337c169cd3 100644 --- a/arch/arm/include/asm/timex.h +++ b/arch/arm/include/asm/timex.h @@ -11,5 +11,6 @@ typedef unsigned long cycles_t; #define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 459abc5d18195..ea128e32e8ca8 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -708,6 +708,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x18); static_assert(offsetof(siginfo_t, si_pkey) == 0x14); static_assert(offsetof(siginfo_t, si_perf_data) == 0x10); static_assert(offsetof(siginfo_t, si_perf_type) == 0x14); +static_assert(offsetof(siginfo_t, si_perf_flags) == 0x18); static_assert(offsetof(siginfo_t, si_band) == 0x0c); static_assert(offsetof(siginfo_t, si_fd) == 0x10); static_assert(offsetof(siginfo_t, si_call_addr) == 0x0c); diff --git a/arch/arm/mach-ep93xx/clock.c b/arch/arm/mach-ep93xx/clock.c index 4fa6ea5461b79..85a496ddc6197 100644 --- a/arch/arm/mach-ep93xx/clock.c +++ b/arch/arm/mach-ep93xx/clock.c @@ -345,9 +345,10 @@ static struct clk_hw *clk_hw_register_ddiv(const char *name, psc->hw.init = &init; clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { kfree(psc); - + return ERR_CAST(clk); + } return &psc->hw; } @@ -452,9 +453,10 @@ static struct clk_hw *clk_hw_register_div(const char *name, psc->hw.init = &init; clk = clk_register(NULL, &psc->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { kfree(psc); - + return ERR_CAST(clk); + } return &psc->hw; } diff --git a/arch/arm/mach-hisi/platsmp.c b/arch/arm/mach-hisi/platsmp.c index a56cc64deeb8f..9ce93e0b6cdc3 100644 --- a/arch/arm/mach-hisi/platsmp.c +++ b/arch/arm/mach-hisi/platsmp.c @@ -67,14 +67,17 @@ static void __init hi3xxx_smp_prepare_cpus(unsigned int max_cpus) } ctrl_base = of_iomap(np, 0); if (!ctrl_base) { + of_node_put(np); pr_err("failed to map address\n"); return; } if (of_property_read_u32(np, "smp-offset", &offset) < 0) { + of_node_put(np); pr_err("failed to find smp-offset property\n"); return; } ctrl_base += offset; + of_node_put(np); } } @@ -160,6 +163,7 @@ static int hip01_boot_secondary(unsigned int cpu, struct task_struct *idle) if (WARN_ON(!node)) return -1; ctrl_base = of_iomap(node, 0); + of_node_put(node); /* set the secondary core boot from DDR */ remap_reg_value = readl_relaxed(ctrl_base + REG_SC_CTRL); diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig index 9e0f592d87d8e..35a3430c7942d 100644 --- a/arch/arm/mach-mediatek/Kconfig +++ b/arch/arm/mach-mediatek/Kconfig @@ -30,6 +30,7 @@ config MACH_MT7623 config MACH_MT7629 bool "MediaTek MT7629 SoCs support" default ARCH_MEDIATEK + select HAVE_ARM_ARCH_TIMER config MACH_MT8127 bool "MediaTek MT8127 SoCs support" diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 9d4a0ab50a468..d63d5eb8d8fdf 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -41,7 +41,7 @@ static DEFINE_SPINLOCK(clockfw_lock); unsigned long omap1_uart_recalc(struct clk *clk) { unsigned int val = __raw_readl(clk->enable_reg); - return val & clk->enable_bit ? 48000000 : 12000000; + return val & 1 << clk->enable_bit ? 48000000 : 12000000; } unsigned long omap1_sossi_recalc(struct clk *clk) diff --git a/arch/arm/mach-pxa/cm-x300.c b/arch/arm/mach-pxa/cm-x300.c index 2e35354b61f56..167e871f059ef 100644 --- a/arch/arm/mach-pxa/cm-x300.c +++ b/arch/arm/mach-pxa/cm-x300.c @@ -354,13 +354,13 @@ static struct platform_device cm_x300_spi_gpio = { static struct gpiod_lookup_table cm_x300_spi_gpiod_table = { .dev_id = "spi_gpio", .table = { - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_SCL, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_SCL - GPIO_LCD_BASE, "sck", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DIN, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DIN - GPIO_LCD_BASE, "mosi", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_DOUT, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_DOUT - GPIO_LCD_BASE, "miso", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-pxa", GPIO_LCD_CS, + GPIO_LOOKUP("pca9555.1", GPIO_LCD_CS - GPIO_LCD_BASE, "cs", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index 200fd35168e05..fcced6499faee 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -681,7 +681,7 @@ static struct platform_device bq24022 = { static struct gpiod_lookup_table bq24022_gpiod_table = { .dev_id = "gpio-regulator", .table = { - GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2, + GPIO_LOOKUP("htc-egpio-0", EGPIO_MAGICIAN_BQ24022_ISET2 - MAGICIAN_EGPIO_BASE, NULL, GPIO_ACTIVE_HIGH), GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN, "enable", GPIO_ACTIVE_LOW), diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c index 431709725d02b..ded5e343e1984 100644 --- a/arch/arm/mach-pxa/tosa.c +++ b/arch/arm/mach-pxa/tosa.c @@ -296,9 +296,9 @@ static struct gpiod_lookup_table tosa_mci_gpio_table = { .table = { GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_nSD_DETECT, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_SD_WP, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_SD_WP - TOSA_SCOOP_GPIO_BASE, "wp", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("gpio-pxa", TOSA_GPIO_PWR_ON, + GPIO_LOOKUP("sharp-scoop.0", TOSA_GPIO_PWR_ON - TOSA_SCOOP_GPIO_BASE, "power", GPIO_ACTIVE_HIGH), { }, }, diff --git a/arch/arm/mach-vexpress/dcscb.c b/arch/arm/mach-vexpress/dcscb.c index a0554d7d04f7c..e1adc098f89ac 100644 --- a/arch/arm/mach-vexpress/dcscb.c +++ b/arch/arm/mach-vexpress/dcscb.c @@ -144,6 +144,7 @@ static int __init dcscb_init(void) if (!node) return -ENODEV; dcscb_base = of_iomap(node, 0); + of_node_put(node); if (!dcscb_base) return -EADDRNOTAVAIL; cfg = readl_relaxed(dcscb_base + DCS_CFG_R); diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index ac964612d8b07..90933eabe1156 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -464,3 +464,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 30b123cde02c5..aaeaf57c82225 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -253,6 +253,7 @@ config ARCH_INTEL_SOCFPGA config ARCH_SYNQUACER bool "Socionext SynQuacer SoC Family" + select IRQ_FASTEOI_HIERARCHY_HANDLERS config ARCH_TEGRA bool "NVIDIA Tegra SoC Family" diff --git a/arch/arm64/boot/dts/arm/juno-r1-scmi.dts b/arch/arm64/boot/dts/arm/juno-r1-scmi.dts index 190a0fba4ad68..fd1f0d26d751a 100644 --- a/arch/arm64/boot/dts/arm/juno-r1-scmi.dts +++ b/arch/arm64/boot/dts/arm/juno-r1-scmi.dts @@ -7,11 +7,11 @@ }; etf@20140000 { - power-domains = <&scmi_devpd 0>; + power-domains = <&scmi_devpd 8>; }; funnel@20150000 { - power-domains = <&scmi_devpd 0>; + power-domains = <&scmi_devpd 8>; }; }; diff --git a/arch/arm64/boot/dts/arm/juno-r2-scmi.dts b/arch/arm64/boot/dts/arm/juno-r2-scmi.dts index dbf13770084f5..35e6d4762c463 100644 --- a/arch/arm64/boot/dts/arm/juno-r2-scmi.dts +++ b/arch/arm64/boot/dts/arm/juno-r2-scmi.dts @@ -7,11 +7,11 @@ }; etf@20140000 { - power-domains = <&scmi_devpd 0>; + power-domains = <&scmi_devpd 8>; }; funnel@20150000 { - power-domains = <&scmi_devpd 0>; + power-domains = <&scmi_devpd 8>; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts index c5eb3604dd5b7..119db6b541b7b 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-ultra.dts @@ -71,10 +71,6 @@ &spi0 { flash@0 { - spi-max-frequency = <108000000>; - spi-rx-bus-width = <4>; - spi-tx-bus-width = <4>; - partitions { compatible = "fixed-partitions"; #address-cells = <1>; @@ -112,7 +108,6 @@ &usb3 { usb-phy = <&usb3_phy>; - status = "disabled"; }; &mdio { diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 411feb2946134..bcecc74844535 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -679,7 +679,7 @@ assigned-clock-parents = <&clk26m>; #address-cells = <1>; #size-cells = <0>; - status = "disable"; + status = "disabled"; }; audsys: clock-controller@11210000 { diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index 218a2b32200f8..4f0e51f1a3430 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -1366,8 +1366,9 @@ <&tegra_car TEGRA210_CLK_DFLL_REF>, <&tegra_car TEGRA210_CLK_I2C5>; clock-names = "soc", "ref", "i2c"; - resets = <&tegra_car TEGRA210_RST_DFLL_DVCO>; - reset-names = "dvco"; + resets = <&tegra_car TEGRA210_RST_DFLL_DVCO>, + <&tegra_car 155>; + reset-names = "dvco", "dfll"; #clock-cells = <0>; clock-output-names = "dfllCPU_out"; status = "disabled"; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index d80b1cefab100..8d4e0e1934396 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -13,7 +13,7 @@ clocks { sleep_clk: sleep_clk { compatible = "fixed-clock"; - clock-frequency = <32000>; + clock-frequency = <32768>; #clock-cells = <0>; }; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 8c1dc5155b713..b1e595cb4b901 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -183,8 +183,8 @@ no-map; }; - cont_splash_mem: memory@3800000 { - reg = <0 0x03800000 0 0x2400000>; + cont_splash_mem: memory@3401000 { + reg = <0 0x03401000 0 0x2200000>; no-map; }; @@ -498,7 +498,7 @@ #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; @@ -634,7 +634,7 @@ #dma-cells = <1>; qcom,ee = <0>; qcom,controlled-remotely; - num-channels = <18>; + num-channels = <24>; qcom,num-ees = <4>; }; diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts index 845eb7a6bf92e..0e63f707b9115 100644 --- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts +++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts @@ -29,7 +29,7 @@ }; /* Fixed crystal oscillator dedicated to MCP2518FD */ - clk40M: can_clock { + clk40M: can-clock { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <40000000>; diff --git a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi index dc17f2079695f..488caa48cba3a 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi @@ -677,7 +677,6 @@ ap_ec_spi: &spi10 { function = "gpio"; bias-disable; drive-strength = <2>; - output-high; }; fp_to_ap_irq_l: fp-to-ap-irq-l { @@ -691,7 +690,6 @@ ap_ec_spi: &spi10 { pins = "gpio68"; function = "gpio"; bias-disable; - output-low; }; gsc_ap_int_odl: gsc-ap-int-odl { @@ -741,7 +739,7 @@ ap_ec_spi: &spi10 { bias-pull-up; }; - sar1_irq_odl: sar0-irq-odl { + sar1_irq_odl: sar1-irq-odl { pins = "gpio140"; function = "gpio"; bias-pull-up; diff --git a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi index ecbf2b89d8963..5ab3696af3548 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-idp.dtsi @@ -400,10 +400,13 @@ &qup_uart7_cts { /* - * Configure a pull-down on CTS to match the pull of - * the Bluetooth module. + * Configure a bias-bus-hold on CTS to lower power + * usage when Bluetooth is turned off. Bus hold will + * maintain a low power state regardless of whether + * the Bluetooth module drives the pin in either + * direction or leaves the pin fully unpowered. */ - bias-pull-down; + bias-bus-hold; }; &qup_uart7_rts { @@ -495,10 +498,13 @@ pins = "gpio28"; function = "gpio"; /* - * Configure a pull-down on CTS to match the pull of - * the Bluetooth module. + * Configure a bias-bus-hold on CTS to lower power + * usage when Bluetooth is turned off. Bus hold will + * maintain a low power state regardless of whether + * the Bluetooth module drives the pin in either + * direction or leaves the pin fully unpowered. */ - bias-pull-down; + bias-bus-hold; }; qup_uart7_sleep_rts: qup-uart7-sleep-rts { diff --git a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi index b833ba1e8f4af..98b5cd70bca52 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-qcard.dtsi @@ -398,8 +398,14 @@ mos_bt_uart: &uart7 { /* For mos_bt_uart */ &qup_uart7_cts { - /* Configure a pull-down on CTS to match the pull of the Bluetooth module. */ - bias-pull-down; + /* + * Configure a bias-bus-hold on CTS to lower power + * usage when Bluetooth is turned off. Bus hold will + * maintain a low power state regardless of whether + * the Bluetooth module drives the pin in either + * direction or leaves the pin fully unpowered. + */ + bias-bus-hold; }; /* For mos_bt_uart */ @@ -490,10 +496,13 @@ mos_bt_uart: &uart7 { pins = "gpio28"; function = "gpio"; /* - * Configure a pull-down on CTS to match the pull of - * the Bluetooth module. + * Configure a bias-bus-hold on CTS to lower power + * usage when Bluetooth is turned off. Bus hold will + * maintain a low power state regardless of whether + * the Bluetooth module drives the pin in either + * direction or leaves the pin fully unpowered. */ - bias-pull-down; + bias-bus-hold; }; /* For mos_bt_uart */ diff --git a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts index 367389526b418..a97f5e89e1d0f 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-xiaomi-beryllium.dts @@ -218,7 +218,7 @@ panel@0 { compatible = "tianma,fhd-video"; reg = <0>; - vddi0-supply = <&vreg_l14a_1p8>; + vddio-supply = <&vreg_l14a_1p8>; vddpos-supply = <&lab>; vddneg-supply = <&ibb>; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index 934e29b9e153b..e63b7b0458cf8 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -693,6 +693,9 @@ clock-names = "m-ahb", "s-ahb"; clocks = <&gcc GCC_QUPV3_WRAP_0_M_AHB_CLK>, <&gcc GCC_QUPV3_WRAP_0_S_AHB_CLK>; + iommus = <&apps_smmu 0x5a3 0x0>; + interconnects = <&clk_virt MASTER_QUP_CORE_0 0 &clk_virt SLAVE_QUP_CORE_0 0>; + interconnect-names = "qup-core"; #address-cells = <2>; #size-cells = <2>; ranges; @@ -718,6 +721,9 @@ clock-names = "m-ahb", "s-ahb"; clocks = <&gcc GCC_QUPV3_WRAP_1_M_AHB_CLK>, <&gcc GCC_QUPV3_WRAP_1_S_AHB_CLK>; + iommus = <&apps_smmu 0x43 0x0>; + interconnects = <&clk_virt MASTER_QUP_CORE_1 0 &clk_virt SLAVE_QUP_CORE_1 0>; + interconnect-names = "qup-core"; #address-cells = <2>; #size-cells = <2>; ranges; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 080457a68e3c7..88f26d89eea1a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1534,6 +1534,7 @@ reg = <0xf780 0x24>; clocks = <&sdhci>; clock-names = "emmcclk"; + drive-impedance-ohm = <50>; #phy-cells = <0>; status = "disabled"; }; @@ -1544,7 +1545,6 @@ clock-names = "refclk"; #phy-cells = <1>; resets = <&cru SRST_PCIEPHY>; - drive-impedance-ohm = <50>; reset-names = "phy"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi index 2bb5c9ff172c9..02d4285acbb8d 100644 --- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi @@ -10,7 +10,6 @@ compatible = "ti,am64-uart", "ti,am654-uart"; reg = <0x00 0x04a00000 0x00 0x100>; interrupts = ; - clock-frequency = <48000000>; current-speed = <115200>; power-domains = <&k3_pds 149 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 149 0>; @@ -21,7 +20,6 @@ compatible = "ti,am64-uart", "ti,am654-uart"; reg = <0x00 0x04a10000 0x00 0x100>; interrupts = ; - clock-frequency = <48000000>; current-speed = <115200>; power-domains = <&k3_pds 160 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 160 0>; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 50aa3d75ab4f4..f30af6e1fe401 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1029,6 +1029,7 @@ CONFIG_SM_GCC_8350=y CONFIG_SM_GCC_8450=y CONFIG_SM_GPUCC_8150=y CONFIG_SM_GPUCC_8250=y +CONFIG_SM_DISPCC_8250=y CONFIG_QCOM_HFPLL=y CONFIG_CLK_GFM_LPASS_SM8250=m CONFIG_CLK_RCAR_USB2_CLOCK_SEL=y diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8bd5afc7b692e..48d4473e8eee2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -26,12 +26,6 @@ * sets the GP register's most significant bits to 0 with an explicit cast. */ -static inline void gic_write_eoir(u32 irq) -{ - write_sysreg_s(irq, SYS_ICC_EOIR1_EL1); - isb(); -} - static __always_inline void gic_write_dir(u32 irq) { write_sysreg_s(irq, SYS_ICC_DIR_EL1); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dff2b483ea509..8e9c2c8376788 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -999,23 +999,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af static inline bool pud_sect_supported(void) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 73e38d9a540ce..6b1a12c23fe77 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -381,12 +381,10 @@ long get_tagged_addr_ctrl(struct task_struct *task); * of header definitions for the use of task_stack_page. */ -#define current_top_of_stack() \ -({ \ - struct stack_info _info; \ - BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \ - _info.high; \ -}) +/* + * The top of the current task's task stack + */ +#define current_top_of_stack() ((unsigned long)current->stack + THREAD_SIZE) #define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 4e65da3445c7a..14c95844f6c84 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 451 +#define __NR_compat_syscalls 452 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 604a2053d0067..91f2bb7199af9 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -907,6 +907,8 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) __SYSCALL(__NR_futex_waitv, sys_futex_waitv) #define __NR_set_mempolicy_home_node 450 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +#define __NR_pmadv_ksm 451 +__SYSCALL(__NR_pmadv_ksm, sys_pmadv_ksm) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4a4122ef6f39b..41b5d9d3672ab 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -1011,6 +1011,7 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28); static_assert(offsetof(siginfo_t, si_pkey) == 0x20); static_assert(offsetof(siginfo_t, si_perf_data) == 0x18); static_assert(offsetof(siginfo_t, si_perf_type) == 0x20); +static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24); static_assert(offsetof(siginfo_t, si_band) == 0x10); static_assert(offsetof(siginfo_t, si_fd) == 0x18); static_assert(offsetof(siginfo_t, si_call_addr) == 0x10); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d984282b979f8..4700f8522d27b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -487,6 +487,7 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18); static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14); static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10); static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18); static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c); static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10); static_assert(offsetof(compat_siginfo_t, si_call_addr) == 0x0c); diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 12c6864e51e13..df14336c3a29c 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -113,6 +113,6 @@ long compat_arm_syscall(struct pt_regs *regs, int scno) addr = instruction_pointer(regs) - (compat_thumb_mode(regs) ? 2 : 4); arm64_notify_die("Oops - bad compat syscall(2)", regs, - SIGILL, ILL_ILLTRP, addr, scno); + SIGILL, ILL_ILLTRP, addr, 0); return 0; } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index b5447e53cd73e..0dea80bf6de46 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -16,8 +16,8 @@ void copy_highpage(struct page *to, struct page *from) { - struct page *kto = page_address(to); - struct page *kfrom = page_address(from); + void *kto = page_address(to); + void *kfrom = page_address(from); copy_page(kto, kfrom); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index fcc675aa1670d..c779e604edac1 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1261,6 +1261,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_jit_binary_free(header); prog->bpf_func = NULL; prog->jited = 0; + prog->jited_len = 0; goto out_off; } bpf_jit_binary_lock_ro(header); diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 42920f25e73c8..34ba684d5962b 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -30,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv) struct csky_insn_patch *param = priv; unsigned int addr = (unsigned int)param->addr; - if (atomic_inc_return(¶m->cpu_count) == 1) { + if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) { *(u16 *) addr = cpu_to_le16(param->opcode); dcache_wb_range(addr, addr + 2); atomic_inc(¶m->cpu_count); diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 869a3ac6bf23a..7ccc077a60bed 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,6 +39,7 @@ get_cycles (void) ret = ia64_getreg(_IA64_REG_AR_ITC); return ret; } +#define get_cycles get_cycles extern void ia64_cpu_local_tick (void); extern unsigned long long ia64_native_sched_clock (void); diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 78b1d03e86e1d..79ad5a5682b30 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -371,3 +371,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu index 16ea9a67723c0..3d5da25c73b5a 100644 --- a/arch/m68k/Kconfig.cpu +++ b/arch/m68k/Kconfig.cpu @@ -327,7 +327,7 @@ comment "Processor Specific Options" config M68KFPU_EMU bool "Math emulation support" - depends on MMU + depends on M68KCLASSIC && FPU help At some point in the future, this will cause floating-point math instructions to be emulated by the kernel on machines that lack a diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index eeab4f3e6c197..946853a08502e 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -335,6 +335,7 @@ comment "Machine Options" config UBOOT bool "Support for U-Boot command line parameters" + depends on COLDFIRE help If you say Y here kernel will try to collect command line parameters from the initial u-boot stack. diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index 87151d67d91e7..bce5ca56c3883 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -42,7 +42,8 @@ extern void paging_init(void); * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -#define ZERO_PAGE(vaddr) (virt_to_page(0)) +extern void *empty_zero_page; +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) /* * All 32bit addresses are effectively valid for vmalloc... diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 80eb2396d01eb..3ba40bc1dfaa9 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -80,14 +80,14 @@ ({ u16 __v = le16_to_cpu(*(__force volatile u16 *) (addr)); __v; }) #define rom_out_8(addr, b) \ - ({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ + (void)({u8 __maybe_unused __w, __v = (b); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u8 *) ((_addr | 0x10000) + (__v<<1)))); }) #define rom_out_be16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v & 0xFF)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v >> 8)<<1)))); }) #define rom_out_le16(addr, w) \ - ({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ + (void)({u16 __maybe_unused __w, __v = (w); u32 _addr = ((u32) (addr)); \ __w = ((*(__force volatile u16 *) ((_addr & 0xFFFF0000UL) + ((__v >> 8)<<1)))); \ __w = ((*(__force volatile u16 *) ((_addr | 0x10000) + ((__v & 0xFF)<<1)))); }) diff --git a/arch/m68k/include/asm/timex.h b/arch/m68k/include/asm/timex.h index 6a21d93582805..f4a7a340f4cae 100644 --- a/arch/m68k/include/asm/timex.h +++ b/arch/m68k/include/asm/timex.h @@ -35,7 +35,7 @@ static inline unsigned long random_get_entropy(void) { if (mach_random_get_entropy) return mach_random_get_entropy(); - return 0; + return random_get_entropy_fallback(); } #define random_get_entropy random_get_entropy diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 8f94feed969c4..07317367ead89 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -87,15 +87,8 @@ void (*mach_sched_init) (void) __initdata = NULL; void (*mach_init_IRQ) (void) __initdata = NULL; void (*mach_get_model) (char *model); void (*mach_get_hardware_list) (struct seq_file *m); -/* machine dependent timer functions */ -int (*mach_hwclk) (int, struct rtc_time*); -EXPORT_SYMBOL(mach_hwclk); unsigned int (*mach_get_ss)(void); -int (*mach_get_rtc_pll)(struct rtc_pll_info *); -int (*mach_set_rtc_pll)(struct rtc_pll_info *); EXPORT_SYMBOL(mach_get_ss); -EXPORT_SYMBOL(mach_get_rtc_pll); -EXPORT_SYMBOL(mach_set_rtc_pll); void (*mach_reset)( void ); void (*mach_halt)( void ); void (*mach_power_off)( void ); diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index 5e4104f07a443..19eea73d3c170 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -50,7 +50,6 @@ char __initdata command_line[COMMAND_LINE_SIZE]; /* machine dependent timer functions */ void (*mach_sched_init)(void) __initdata = NULL; -int (*mach_hwclk) (int, struct rtc_time*); /* machine dependent reboot functions */ void (*mach_reset)(void); diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 49533f65958a6..b9f6908a31bc3 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -625,6 +625,7 @@ static inline void siginfo_build_tests(void) /* _sigfault._perf */ BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x18); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index b1f3940bc2981..5ccf925567da0 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 340ffeea0a9dc..a97600b2af502 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -63,6 +63,15 @@ void timer_heartbeat(void) #endif /* CONFIG_HEARTBEAT */ #ifdef CONFIG_M68KCLASSIC +/* machine dependent timer functions */ +int (*mach_hwclk) (int, struct rtc_time*); +EXPORT_SYMBOL(mach_hwclk); + +int (*mach_get_rtc_pll)(struct rtc_pll_info *); +int (*mach_set_rtc_pll)(struct rtc_pll_info *); +EXPORT_SYMBOL(mach_get_rtc_pll); +EXPORT_SYMBOL(mach_set_rtc_pll); + #if !IS_BUILTIN(CONFIG_RTC_DRV_GENERIC) void read_persistent_clock64(struct timespec64 *ts) { diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 820145e473501..6b76208597f3c 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h index c8385c4e8664a..568fe09332eb4 100644 --- a/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip27/cpu-feature-overrides.h @@ -25,7 +25,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h index 8ad0c424a9afb..ce4e4c6e09e24 100644 --- a/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ip30/cpu-feature-overrides.h @@ -28,7 +28,6 @@ #define cpu_has_4kex 1 #define cpu_has_3k_cache 0 #define cpu_has_4k_cache 1 -#define cpu_has_fpu 1 #define cpu_has_nofpuex 0 #define cpu_has_32fpr 1 #define cpu_has_counter 1 diff --git a/arch/mips/include/asm/mach-ralink/spaces.h b/arch/mips/include/asm/mach-ralink/spaces.h index f7af11ea2d612..a9f0570d0f044 100644 --- a/arch/mips/include/asm/mach-ralink/spaces.h +++ b/arch/mips/include/asm/mach-ralink/spaces.h @@ -6,7 +6,9 @@ #define PCI_IOSIZE SZ_64K #define IO_SPACE_LIMIT (PCI_IOSIZE - 1) +#ifdef CONFIG_PCI_DRIVERS_GENERIC #define pci_remap_iospace pci_remap_iospace +#endif #include #endif diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h index 8026baf46e729..2e107886f97ac 100644 --- a/arch/mips/include/asm/timex.h +++ b/arch/mips/include/asm/timex.h @@ -76,25 +76,24 @@ static inline cycles_t get_cycles(void) else return 0; /* no usable counter */ } +#define get_cycles get_cycles /* * Like get_cycles - but where c0_count is not available we desperately * use c0_random in an attempt to get at least a little bit of entropy. - * - * R6000 and R6000A neither have a count register nor a random register. - * That leaves no entropy source in the CPU itself. */ static inline unsigned long random_get_entropy(void) { - unsigned int prid = read_c0_prid(); - unsigned int imp = prid & PRID_IMP_MASK; + unsigned int c0_random; - if (can_use_mips_counter(prid)) + if (can_use_mips_counter(read_c0_prid())) return read_c0_count(); - else if (likely(imp != PRID_IMP_R6000 && imp != PRID_IMP_R6000A)) - return read_c0_random(); + + if (cpu_has_3kex) + c0_random = (read_c0_random() >> 8) & 0x3f; else - return 0; /* no usable register */ + c0_random = read_c0_random() & 0x3f; + return (random_get_entropy_fallback() << 6) | (0x3f - c0_random); } #define random_get_entropy random_get_entropy diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 17aff13cd7ce6..3e386f7e15450 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -28,6 +28,7 @@ phys_addr_t __weak mips_cpc_default_phys_base(void) cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc"); if (cpc_node) { err = of_address_to_resource(cpc_node, 0, &res); + of_node_put(cpc_node); if (!err) return res.start; } diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 253ff994ed2ec..e4aeedb17c383 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -389,3 +389,4 @@ 448 n32 process_mrelease sys_process_mrelease 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 n32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 3f1886ad9d806..fe88db51efa00 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -365,3 +365,4 @@ 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 n64 pmadv_ksm sys_pmadv_ksm diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 8f243e35a7b20..674cb940bd153 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -438,3 +438,4 @@ 448 o32 process_mrelease sys_process_mrelease 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node +451 o32 pmadv_ksm sys_pmadv_ksm diff --git a/arch/nios2/include/asm/timex.h b/arch/nios2/include/asm/timex.h index a769f871b28d9..40a1adc9bd03e 100644 --- a/arch/nios2/include/asm/timex.h +++ b/arch/nios2/include/asm/timex.h @@ -8,5 +8,8 @@ typedef unsigned long cycles_t; extern cycles_t get_cycles(void); +#define get_cycles get_cycles + +#define random_get_entropy() (((unsigned long)get_cycles()) ?: random_get_entropy_fallback()) #endif diff --git a/arch/openrisc/include/asm/timex.h b/arch/openrisc/include/asm/timex.h index d52b4e536e3f9..5487fa93dd9be 100644 --- a/arch/openrisc/include/asm/timex.h +++ b/arch/openrisc/include/asm/timex.h @@ -23,6 +23,7 @@ static inline cycles_t get_cycles(void) { return mfspr(SPR_TTCR); } +#define get_cycles get_cycles /* This isn't really used any more */ #define CLOCK_TICK_RATE 1000 diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 15f1b38dfe03b..871f4c8588595 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -521,6 +521,15 @@ _start: l.ori r3,r0,0x1 l.mtspr r0,r3,SPR_SR + /* + * Start the TTCR as early as possible, so that the RNG can make use of + * measurements of boot time from the earliest opportunity. Especially + * important is that the TTCR does not return zero by the time we reach + * rand_initialize(). + */ + l.movhi r3,hi(SPR_TTMR_CR) + l.mtspr r0,r3,SPR_TTMR + CLEAR_GPR(r1) CLEAR_GPR(r2) CLEAR_GPR(r3) diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h index c4cd6360f9964..d63a2acb91f2b 100644 --- a/arch/parisc/include/asm/fb.h +++ b/arch/parisc/include/asm/fb.h @@ -12,9 +12,13 @@ static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; } +#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI) +int fb_is_primary_device(struct fb_info *info); +#else static inline int fb_is_primary_device(struct fb_info *info) { return 0; } +#endif #endif /* _ASM_FB_H_ */ diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h index 06b510f8172e3..b4622cb06a75e 100644 --- a/arch/parisc/include/asm/timex.h +++ b/arch/parisc/include/asm/timex.h @@ -13,9 +13,10 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles (void) +static inline cycles_t get_cycles(void) { return mfctl(16); } +#define get_cycles get_cycles #endif diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 26eb568f8b961..dddaaa6e7a825 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -327,8 +327,6 @@ int init_per_cpu(int cpunum) set_firmware_width(); ret = pdc_coproc_cfg(&coproc_cfg); - store_cpu_topology(cpunum); - if(ret >= 0 && coproc_cfg.ccr_functional) { mtctl(coproc_cfg.ccr_functional, 10); /* 10 == Coprocessor Control Reg */ diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 68b46fe2f17c5..e19e8deb4c322 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -448,3 +448,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index 9696e3cb6a2a6..b9d845e314f8b 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -20,8 +20,6 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); -static int dualcores_found; - /* * store_cpu_topology is called at boot when only one cpu is running * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, @@ -60,7 +58,6 @@ void store_cpu_topology(unsigned int cpuid) if (p->cpu_loc) { cpuid_topo->core_id++; cpuid_topo->package_id = cpu_topology[cpu].package_id; - dualcores_found = 1; continue; } } @@ -80,22 +77,11 @@ void store_cpu_topology(unsigned int cpuid) cpu_topology[cpuid].package_id); } -static struct sched_domain_topology_level parisc_mc_topology[] = { -#ifdef CONFIG_SCHED_MC - { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, -#endif - - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - /* * init_cpu_topology is called at boot when only one cpu is running * which prevent simultaneous write access to cpu_topology array */ void __init init_cpu_topology(void) { - /* Set scheduler topology descriptor */ - if (dualcores_found) - set_sched_topology(parisc_mc_topology); + reset_cpu_topology(); } diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 174edabb74fa1..500f2a0831ef8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -218,7 +218,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IOREMAP_PROT - select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE @@ -771,7 +770,6 @@ config THREAD_SHIFT range 13 15 default "15" if PPC_256K_PAGES default "14" if PPC64 - default "14" if KASAN default "13" help Used to define the stack size. The default is almost always what you diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f2c5c26869f1a..03ae544eb6cc4 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -216,6 +216,9 @@ static inline bool pfn_valid(unsigned long pfn) #define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET) #else #ifdef CONFIG_PPC64 + +#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x)) + /* * gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET * with -mcmodel=medium, so we use & and | instead of - and + on 64-bit. @@ -223,13 +226,13 @@ static inline bool pfn_valid(unsigned long pfn) */ #define __va(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) >= PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \ }) #define __pa(x) \ ({ \ - VIRTUAL_BUG_ON((unsigned long)(x) < PAGE_OFFSET); \ + VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \ (unsigned long)(x) & 0x0fffffffffffffffUL; \ }) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 125328d1b9802..af58f1ed3952e 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -14,10 +14,16 @@ #ifdef __KERNEL__ -#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT +#ifdef CONFIG_KASAN +#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1) +#else +#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT +#endif + +#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT #define THREAD_SHIFT PAGE_SHIFT #else -#define THREAD_SHIFT CONFIG_THREAD_SHIFT +#define THREAD_SHIFT MIN_THREAD_SHIFT #endif #define THREAD_SIZE (1 << THREAD_SHIFT) diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index fa2e76e4093a3..14b4489de52c5 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -19,6 +19,7 @@ static inline cycles_t get_cycles(void) { return mftb(); } +#define get_cycles get_cycles #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TIMEX_H */ diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index 83afcb6c194b5..c36f71e01c0f0 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -126,7 +126,7 @@ static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref) * Receive window attributes specified by the (in-kernel) owner of window. */ struct vas_rx_win_attr { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 9581906b5ee9c..da18f83ef8834 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -330,22 +330,22 @@ _GLOBAL(enter_rtas) clrldi r4,r4,2 /* convert to realmode address */ mtlr r4 - li r0,0 - ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI - andc r0,r6,r0 - - li r9,1 - rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG) - ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE - andc r6,r0,r9 - __enter_rtas: - sync /* disable interrupts so SRR0/1 */ - mtmsrd r0 /* don't get trashed */ - LOAD_REG_ADDR(r4, rtas) ld r5,RTASENTRY(r4) /* get the rtas->entry value */ ld r4,RTASBASE(r4) /* get the rtas->base value */ + + /* + * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we + * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in + * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] + * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if + * MSR[S] is set, it will remain when entering RTAS. + */ + LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + + li r0,0 + mtmsrd r0,1 /* disable RI before using SRR0/1 */ mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 65562c4a0a690..dc2350b288cfe 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -867,7 +867,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info) sizeof(struct fadump_memory_range)); return 0; } - static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, u64 base, u64 end) { @@ -886,7 +885,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, start = mem_ranges[mrange_info->mem_range_cnt - 1].base; size = mem_ranges[mrange_info->mem_range_cnt - 1].size; - if ((start + size) == base) + /* + * Boot memory area needs separate PT_LOAD segment(s) as it + * is moved to a different location at the time of crash. + * So, fold only if the region is not boot memory area. + */ + if ((start + size) == base && start >= fw_dump.boot_mem_top) is_adjacent = true; } if (!is_adjacent) { diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 4ad79eb638c62..77cd4c5a2d631 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -37,7 +37,7 @@ static int __init powersave_off(char *arg) { ppc_md.power_save = NULL; cpuidle_disable = IDLE_POWERSAVE_OFF; - return 0; + return 1; } __setup("powersave=off", powersave_off); diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c index 5dca19361316e..09c49632bfe59 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-fpu.c +++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c @@ -17,9 +17,13 @@ int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data) #ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); - else + if (fpidx < (PT_FPSCR - PT_FPR0)) { + if (IS_ENABLED(CONFIG_PPC32)) + // On 32-bit the index we are passed refers to 32-bit words + *data = ((u32 *)child->thread.fp_state.fpr)[fpidx]; + else + memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long)); + } else *data = child->thread.fp_state.fpscr; #else *data = 0; @@ -39,9 +43,13 @@ int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data) #ifdef CONFIG_PPC_FPU_REGS flush_fp_to_thread(child); - if (fpidx < (PT_FPSCR - PT_FPR0)) - memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); - else + if (fpidx < (PT_FPSCR - PT_FPR0)) { + if (IS_ENABLED(CONFIG_PPC32)) + // On 32-bit the index we are passed refers to 32-bit words + ((u32 *)child->thread.fp_state.fpr)[fpidx] = data; + else + memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long)); + } else child->thread.fp_state.fpscr = data; #endif diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 6d5026a9db4fc..eab6fa59d9d2d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -450,4 +450,7 @@ void __init pt_regs_check(void) #else BUILD_BUG_ON(IS_ENABLED(CONFIG_HAVE_FUNCTION_DESCRIPTORS)); #endif + + // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible + BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1f42aabbbab3a..6bc89d9ccf635 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -49,6 +49,15 @@ void enter_rtas(unsigned long); static inline void do_enter_rtas(unsigned long args) { + unsigned long msr; + + /* + * Make sure MSR[RI] is currently enabled as it will be forced later + * in enter_rtas. + */ + msr = mfmsr(); + BUG_ON(!(msr & MSR_RI)); + enter_rtas(args); srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292c..bb2f71a369415 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 6fa518f6501d5..aef0a6b423d80 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4233,13 +4233,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) start_wait = ktime_get(); vc->vcore_state = VCORE_SLEEPING; - trace_kvmppc_vcore_blocked(vc, 0); + trace_kvmppc_vcore_blocked(vc->runner, 0); spin_unlock(&vc->lock); schedule(); finish_rcuwait(&vc->wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; - trace_kvmppc_vcore_blocked(vc, 1); + trace_kvmppc_vcore_blocked(vc->runner, 1); ++vc->runner->stat.halt_successful_wait; cur = ktime_get(); @@ -4619,9 +4619,9 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (kvmppc_vcpu_check_block(vcpu)) break; - trace_kvmppc_vcore_blocked(vc, 0); + trace_kvmppc_vcore_blocked(vcpu, 0); schedule(); - trace_kvmppc_vcore_blocked(vc, 1); + trace_kvmppc_vcore_blocked(vcpu, 1); } finish_rcuwait(wait); } @@ -5283,6 +5283,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR); lpcr &= LPCR_PECE | LPCR_LPES; } else { + /* + * The L2 LPES mode will be set by the L0 according to whether + * or not it needs to take external interrupts in HV mode. + */ lpcr = 0; } lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE | diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index c943a051c6e70..265bb30a0af2b 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -261,8 +261,7 @@ static void load_l2_hv_regs(struct kvm_vcpu *vcpu, /* * Don't let L1 change LPCR bits for the L2 except these: */ - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | - LPCR_LPES | LPCR_MER; + mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER; /* * Additional filtering is required depending on hardware diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c index 45c993dd05f5e..36f2314c58e5f 100644 --- a/arch/powerpc/kvm/book3s_hv_uvmem.c +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c @@ -361,13 +361,15 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm, static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot, struct kvm *kvm, unsigned long *gfn) { - struct kvmppc_uvmem_slot *p; + struct kvmppc_uvmem_slot *p = NULL, *iter; bool ret = false; unsigned long i; - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) - if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns) + list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list) + if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) { + p = iter; break; + } if (!p) return ret; /* diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 38cd0ed0a6178..32e2cb5811cc8 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -409,9 +409,9 @@ TRACE_EVENT(kvmppc_run_core, ); TRACE_EVENT(kvmppc_vcore_blocked, - TP_PROTO(struct kvmppc_vcore *vc, int where), + TP_PROTO(struct kvm_vcpu *vcpu, int where), - TP_ARGS(vc, where), + TP_ARGS(vcpu, where), TP_STRUCT__entry( __field(int, n_runnable) @@ -421,8 +421,8 @@ TRACE_EVENT(kvmppc_vcore_blocked, ), TP_fast_assign( - __entry->runner_vcpu = vc->runner->vcpu_id; - __entry->n_runnable = vc->n_runnable; + __entry->runner_vcpu = vcpu->vcpu_id; + __entry->n_runnable = vcpu->arch.vcore->n_runnable; __entry->where = where; __entry->tgid = current->tgid; ), diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/fsl_book3e.c index dfe715e0f70ac..388f7c7dabd30 100644 --- a/arch/powerpc/mm/nohash/fsl_book3e.c +++ b/arch/powerpc/mm/nohash/fsl_book3e.c @@ -287,22 +287,19 @@ void __init adjust_total_lowmem(void) #ifdef CONFIG_STRICT_KERNEL_RWX void mmu_mark_rodata_ro(void) -{ - /* Everything is done in mmu_mark_initmem_nx() */ -} -#endif - -void mmu_mark_initmem_nx(void) { unsigned long remapped; - if (!strict_kernel_rwx_enabled()) - return; - remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false); WARN_ON(__max_low_memory != remapped); } +#endif + +void mmu_mark_initmem_nx(void) +{ + /* Everything is done in mmu_mark_rodata_ro() */ +} void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index a74d382ecbb77..bb5d64862bc99 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -108,7 +108,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) *mmcra |= MMCRA_SDAR_MODE_TLB; } -static u64 p10_thresh_cmp_val(u64 value) +static int p10_thresh_cmp_val(u64 value) { int exp = 0; u64 result = value; @@ -139,7 +139,7 @@ static u64 p10_thresh_cmp_val(u64 value) * exponent is also zero. */ if (!(value & 0xC0) && exp) - result = 0; + result = -1; else result = (exp << 8) | value; } @@ -187,7 +187,7 @@ static bool is_thresh_cmp_valid(u64 event) unsigned int cmp, exp; if (cpu_has_feature(CPU_FTR_ARCH_31)) - return p10_thresh_cmp_val(event) != 0; + return p10_thresh_cmp_val(event) >= 0; /* * Check the mantissa upper two bits are not zero, unless the @@ -502,12 +502,14 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT); mask |= p10_CNST_THRESH_CMP_MASK; value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1)); - } + } else if (event_is_threshold(event)) + return -1; } else if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (event_is_threshold(event) && is_thresh_cmp_valid(event)) { mask |= CNST_THRESH_MASK; value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT); - } + } else if (event_is_threshold(event)) + return -1; } else { /* * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC, diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index 2571841625a23..1d3bc35ee1a7d 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -327,6 +327,6 @@ late_initcall(cpm_init); static int __init cpm_powersave_off(char *arg) { cpm.powersave_off = 1; - return 0; + return 1; } __setup("powersave=off", cpm_powersave_off); diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index c58b6f1c40e35..3ef5e9fd3a9b6 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -280,6 +280,7 @@ cpm_setbrg(uint brg, uint rate) out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) | CPM_BRG_EN | CPM_BRG_DIV16); } +EXPORT_SYMBOL(cpm_setbrg); struct cpm_ioport16 { __be16 dir, par, odr_sor, dat, intr; diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c index c8ad057c72210..9d74d3950a523 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.c +++ b/arch/powerpc/platforms/powernv/opal-fadump.c @@ -60,7 +60,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) addr = be64_to_cpu(addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = (void *)addr; - if (opal_fdm_active->registered_regions == 0) + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) return; ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr); @@ -95,17 +95,17 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf); static void opal_fadump_update_config(struct fw_dump *fadump_conf, const struct opal_fadump_mem_struct *fdm) { - pr_debug("Boot memory regions count: %d\n", fdm->region_cnt); + pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt)); /* * The destination address of the first boot memory region is the * destination address of boot memory regions. */ - fadump_conf->boot_mem_dest_addr = fdm->rgn[0].dest; + fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest); pr_debug("Destination address of boot memory regions: %#016llx\n", fadump_conf->boot_mem_dest_addr); - fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr; + fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr); } /* @@ -126,9 +126,9 @@ static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, fadump_conf->boot_memory_size = 0; pr_debug("Boot memory regions:\n"); - for (i = 0; i < fdm->region_cnt; i++) { - base = fdm->rgn[i].src; - size = fdm->rgn[i].size; + for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) { + base = be64_to_cpu(fdm->rgn[i].src); + size = be64_to_cpu(fdm->rgn[i].size); pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size); fadump_conf->boot_mem_addr[i] = base; @@ -143,7 +143,7 @@ static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, * Start address of reserve dump area (permanent reservation) for * re-registering FADump after dump capture. */ - fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest; + fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest); /* * Rarely, but it can so happen that system crashes before all @@ -155,13 +155,14 @@ static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, * Hope the memory that could not be preserved only has pages * that are usually filtered out while saving the vmcore. */ - if (fdm->region_cnt > fdm->registered_regions) { + if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) { pr_warn("Not all memory regions were saved!!!\n"); pr_warn(" Unsaved memory regions:\n"); - i = fdm->registered_regions; - while (i < fdm->region_cnt) { + i = be16_to_cpu(fdm->registered_regions); + while (i < be16_to_cpu(fdm->region_cnt)) { pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n", - i, fdm->rgn[i].src, fdm->rgn[i].size); + i, be64_to_cpu(fdm->rgn[i].src), + be64_to_cpu(fdm->rgn[i].size)); i++; } @@ -170,7 +171,7 @@ static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, } fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size); - fadump_conf->boot_mem_regs_cnt = fdm->region_cnt; + fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt); opal_fadump_update_config(fadump_conf, fdm); } @@ -178,35 +179,38 @@ static void __init opal_fadump_get_config(struct fw_dump *fadump_conf, static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm) { fdm->version = OPAL_FADUMP_VERSION; - fdm->region_cnt = 0; - fdm->registered_regions = 0; - fdm->fadumphdr_addr = 0; + fdm->region_cnt = cpu_to_be16(0); + fdm->registered_regions = cpu_to_be16(0); + fdm->fadumphdr_addr = cpu_to_be64(0); } static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf) { u64 addr = fadump_conf->reserve_dump_area_start; + u16 reg_cnt; int i; opal_fdm = __va(fadump_conf->kernel_metadata); opal_fadump_init_metadata(opal_fdm); /* Boot memory regions */ + reg_cnt = be16_to_cpu(opal_fdm->region_cnt); for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) { - opal_fdm->rgn[i].src = fadump_conf->boot_mem_addr[i]; - opal_fdm->rgn[i].dest = addr; - opal_fdm->rgn[i].size = fadump_conf->boot_mem_sz[i]; + opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]); + opal_fdm->rgn[i].dest = cpu_to_be64(addr); + opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]); - opal_fdm->region_cnt++; + reg_cnt++; addr += fadump_conf->boot_mem_sz[i]; } + opal_fdm->region_cnt = cpu_to_be16(reg_cnt); /* * Kernel metadata is passed to f/w and retrieved in capture kerenl. * So, use it to save fadump header address instead of calculating it. */ - opal_fdm->fadumphdr_addr = (opal_fdm->rgn[0].dest + - fadump_conf->boot_memory_size); + opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) + + fadump_conf->boot_memory_size); opal_fadump_update_config(fadump_conf, opal_fdm); @@ -269,18 +273,21 @@ static u64 opal_fadump_get_bootmem_min(void) static int opal_fadump_register(struct fw_dump *fadump_conf) { s64 rc = OPAL_PARAMETER; + u16 registered_regs; int i, err = -EIO; - for (i = 0; i < opal_fdm->region_cnt; i++) { + registered_regs = be16_to_cpu(opal_fdm->registered_regions); + for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) { rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE, - opal_fdm->rgn[i].src, - opal_fdm->rgn[i].dest, - opal_fdm->rgn[i].size); + be64_to_cpu(opal_fdm->rgn[i].src), + be64_to_cpu(opal_fdm->rgn[i].dest), + be64_to_cpu(opal_fdm->rgn[i].size)); if (rc != OPAL_SUCCESS) break; - opal_fdm->registered_regions++; + registered_regs++; } + opal_fdm->registered_regions = cpu_to_be16(registered_regs); switch (rc) { case OPAL_SUCCESS: @@ -291,7 +298,8 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) case OPAL_RESOURCE: /* If MAX regions limit in f/w is hit, warn and proceed. */ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n", - (opal_fdm->region_cnt - opal_fdm->registered_regions)); + (be16_to_cpu(opal_fdm->region_cnt) - + be16_to_cpu(opal_fdm->registered_regions))); fadump_conf->dump_registered = 1; err = 0; break; @@ -312,7 +320,7 @@ static int opal_fadump_register(struct fw_dump *fadump_conf) * If some regions were registered before OPAL_MPIPL_ADD_RANGE * OPAL call failed, unregister all regions. */ - if ((err < 0) && (opal_fdm->registered_regions > 0)) + if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0)) opal_fadump_unregister(fadump_conf); return err; @@ -328,7 +336,7 @@ static int opal_fadump_unregister(struct fw_dump *fadump_conf) return -EIO; } - opal_fdm->registered_regions = 0; + opal_fdm->registered_regions = cpu_to_be16(0); fadump_conf->dump_registered = 0; return 0; } @@ -563,19 +571,20 @@ static void opal_fadump_region_show(struct fw_dump *fadump_conf, else fdm_ptr = opal_fdm; - for (i = 0; i < fdm_ptr->region_cnt; i++) { + for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) { /* * Only regions that are registered for MPIPL * would have dump data. */ if ((fadump_conf->dump_active) && - (i < fdm_ptr->registered_regions)) - dumped_bytes = fdm_ptr->rgn[i].size; + (i < be16_to_cpu(fdm_ptr->registered_regions))) + dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size); seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ", - fdm_ptr->rgn[i].src, fdm_ptr->rgn[i].dest); + be64_to_cpu(fdm_ptr->rgn[i].src), + be64_to_cpu(fdm_ptr->rgn[i].dest)); seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n", - fdm_ptr->rgn[i].size, dumped_bytes); + be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes); } /* Dump is active. Show reserved area start address. */ @@ -624,6 +633,7 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { const __be32 *prop; unsigned long dn; + __be64 be_addr; u64 addr = 0; int i, len; s64 ret; @@ -680,13 +690,13 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) if (!prop) return; - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr); - if ((ret != OPAL_SUCCESS) || !addr) { + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr); + if ((ret != OPAL_SUCCESS) || !be_addr) { pr_err("Failed to get Kernel metadata (%lld)\n", ret); return; } - addr = be64_to_cpu(addr); + addr = be64_to_cpu(be_addr); pr_debug("Kernel metadata addr: %llx\n", addr); opal_fdm_active = __va(addr); @@ -697,14 +707,14 @@ void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) } /* Kernel regions not registered with f/w for MPIPL */ - if (opal_fdm_active->registered_regions == 0) { + if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) { opal_fdm_active = NULL; return; } - ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr); - if (addr) { - addr = be64_to_cpu(addr); + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr); + if (be_addr) { + addr = be64_to_cpu(be_addr); pr_debug("CPU metadata addr: %llx\n", addr); opal_cpu_metadata = __va(addr); } diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h index f1e9ecf548c5d..3f715efb0aa6e 100644 --- a/arch/powerpc/platforms/powernv/opal-fadump.h +++ b/arch/powerpc/platforms/powernv/opal-fadump.h @@ -31,14 +31,14 @@ * OPAL FADump kernel metadata * * The address of this structure will be registered with f/w for retrieving - * and processing during crash dump. + * in the capture kernel to process the crash dump. */ struct opal_fadump_mem_struct { u8 version; u8 reserved[3]; - u16 region_cnt; /* number of regions */ - u16 registered_regions; /* Regions registered for MPIPL */ - u64 fadumphdr_addr; + __be16 region_cnt; /* number of regions */ + __be16 registered_regions; /* Regions registered for MPIPL */ + __be64 fadumphdr_addr; struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS]; } __packed; @@ -135,7 +135,7 @@ static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt, for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) { reg_entry = (struct hdat_fadump_reg_entry *)bufp; val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) : - reg_entry->reg_val); + (u64)(reg_entry->reg_val)); opal_fadump_set_regval_regnum(regs, be32_to_cpu(reg_entry->reg_type), be32_to_cpu(reg_entry->reg_num), diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 105d889abd51a..824c3ad7a0faf 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -96,6 +96,15 @@ static void __init init_fw_feat_flags(struct device_node *np) if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); + + if (fw_feature_is("enabled", "no-need-l1d-flush-msr-pr-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (fw_feature_is("enabled", "no-need-l1d-flush-kernel-on-user-access", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + + if (fw_feature_is("enabled", "no-need-store-drain-on-priv-state-switch", np)) + security_ftr_clear(SEC_FTR_STF_BARRIER); } static void __init pnv_setup_security_mitigations(void) diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c index e4a00ad06f9d3..67c8c4b2d8b17 100644 --- a/arch/powerpc/platforms/powernv/ultravisor.c +++ b/arch/powerpc/platforms/powernv/ultravisor.c @@ -55,6 +55,7 @@ static int __init uv_init(void) return -ENODEV; uv_memcons = memcons_init(node, "memcons"); + of_node_put(node); if (!uv_memcons) return -ENOENT; diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c index a7aabc18039eb..c1bfad56447d4 100644 --- a/arch/powerpc/platforms/powernv/vas-fault.c +++ b/arch/powerpc/platforms/powernv/vas-fault.c @@ -216,7 +216,7 @@ int vas_setup_fault_window(struct vas_instance *vinst) vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT); attr.rx_fifo_size = vinst->fault_fifo_size; - attr.rx_fifo = vinst->fault_fifo; + attr.rx_fifo = __pa(vinst->fault_fifo); /* * Max creds is based on number of CRBs can fit in the FIFO. diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index 0f8d39fbf2b21..0072682531d80 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -404,7 +404,7 @@ static void init_winctx_regs(struct pnv_vas_window *window, * * See also: Design note in function header. */ - val = __pa(winctx->rx_fifo); + val = winctx->rx_fifo; val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0); write_hvwc_reg(window, VREG(LFIFO_BAR), val); @@ -739,7 +739,7 @@ static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin, */ winctx->fifo_disable = true; winctx->intr_disable = true; - winctx->rx_fifo = NULL; + winctx->rx_fifo = 0; } winctx->lnotify_lpid = rxattr->lnotify_lpid; diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 8bb08e395de05..08d9d3d5a22b0 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -376,7 +376,7 @@ struct pnv_vas_window { * is a container for the register fields in the window context. */ struct vas_winctx { - void *rx_fifo; + u64 rx_fifo; int rx_fifo_size; int wcreds_max; int rsvd_txbuf_count; diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 39962c9055422..82cae08976bcd 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -125,8 +125,8 @@ struct papr_scm_priv { /* The bits which needs to be overridden */ u64 health_bitmap_inject_mask; - /* array to have event_code and stat_id mappings */ - char **nvdimm_events_map; + /* array to have event_code and stat_id mappings */ + u8 *nvdimm_events_map; }; static int papr_scm_pmem_flush(struct nd_region *nd_region, @@ -370,7 +370,7 @@ static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, stat = &stats->scm_statistic[0]; memcpy(&stat->stat_id, - p->nvdimm_events_map[event->attr.config], + &p->nvdimm_events_map[event->attr.config * sizeof(stat->stat_id)], sizeof(stat->stat_id)); stat->stat_val = 0; @@ -462,14 +462,16 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu { struct papr_scm_perf_stat *stat; struct papr_scm_perf_stats *stats; - int index, rc, count; u32 available_events; + int index, rc = 0; if (!p->stat_buffer_len) return -ENOENT; available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats)) / sizeof(struct papr_scm_perf_stat); + if (available_events == 0) + return -EOPNOTSUPP; /* Allocate the buffer for phyp where stats are written */ stats = kzalloc(p->stat_buffer_len, GFP_KERNEL); @@ -478,35 +480,30 @@ static int papr_scm_pmu_check_events(struct papr_scm_priv *p, struct nvdimm_pmu return rc; } - /* Allocate memory to nvdimm_event_map */ - p->nvdimm_events_map = kcalloc(available_events, sizeof(char *), GFP_KERNEL); - if (!p->nvdimm_events_map) { - rc = -ENOMEM; - goto out_stats; - } - /* Called to get list of events supported */ rc = drc_pmem_query_stats(p, stats, 0); if (rc) - goto out_nvdimm_events_map; - - for (index = 0, stat = stats->scm_statistic, count = 0; - index < available_events; index++, ++stat) { - p->nvdimm_events_map[count] = kmemdup_nul(stat->stat_id, 8, GFP_KERNEL); - if (!p->nvdimm_events_map[count]) { - rc = -ENOMEM; - goto out_nvdimm_events_map; - } + goto out; - count++; + /* + * Allocate memory and populate nvdimm_event_map. + * Allocate an extra element for NULL entry + */ + p->nvdimm_events_map = kcalloc(available_events + 1, + sizeof(stat->stat_id), + GFP_KERNEL); + if (!p->nvdimm_events_map) { + rc = -ENOMEM; + goto out; } - p->nvdimm_events_map[count] = NULL; - kfree(stats); - return 0; -out_nvdimm_events_map: - kfree(p->nvdimm_events_map); -out_stats: + /* Copy all stat_ids to event map */ + for (index = 0, stat = stats->scm_statistic; + index < available_events; index++, ++stat) { + memcpy(&p->nvdimm_events_map[index * sizeof(stat->stat_id)], + &stat->stat_id, sizeof(stat->stat_id)); + } +out: kfree(stats); return rc; } diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index be6b99b1b3523..9a02aed886a0d 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -404,9 +404,10 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } /* Initialize the DART HW */ - if (dart_init(dn) != 0) + if (dart_init(dn) != 0) { + of_node_put(dn); return; - + } /* * U4 supports a DART bypass, we use it for 64-bit capable devices to * improve performance. However, that only works for devices connected @@ -419,6 +420,7 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) /* Setup pci_dma ops */ set_pci_dma_ops(&dma_iommu_ops); + of_node_put(dn); } #ifdef CONFIG_PM diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index ff7906b48ca1e..1bfc9afa8a1a1 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -505,8 +505,10 @@ int fsl_rio_setup(struct platform_device *dev) if (rc) { dev_err(&dev->dev, "Can't get %pOF property 'reg'\n", rmu_node); + of_node_put(rmu_node); goto err_rmu; } + of_node_put(rmu_node); rmu_regs_win = ioremap(rmu_regs.start, resource_size(&rmu_regs)); if (!rmu_regs_win) { dev_err(&dev->dev, "Unable to map rmu register window\n"); diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index bda4c32582d97..4dae624b9f2f4 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -196,6 +196,7 @@ int __init icp_opal_init(void) printk("XICS: Using OPAL ICP fallbacks\n"); + of_node_put(np); return 0; } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 29456c255f9f7..503f544d28e29 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -830,12 +830,12 @@ bool __init xive_spapr_init(void) /* Resource 1 is the OS ring TIMA */ if (of_address_to_resource(np, 1, &r)) { pr_err("Failed to get thread mgmnt area resource\n"); - return false; + goto err_put; } tima = ioremap(r.start, resource_size(&r)); if (!tima) { pr_err("Failed to map thread mgmnt area\n"); - return false; + goto err_put; } if (!xive_get_max_prio(&max_prio)) @@ -871,6 +871,7 @@ bool __init xive_spapr_init(void) if (!xive_core_init(np, &xive_spapr_ops, tima, TM_QW1_OS, max_prio)) goto err_mem_free; + of_node_put(np); pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10)); return true; @@ -878,6 +879,8 @@ bool __init xive_spapr_init(void) xive_irq_bitmap_remove_all(); err_unmap: iounmap(tima); +err_put: + of_node_put(np); return false; } diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 7d81102cffd48..c6ca1b9cbf712 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -154,3 +154,7 @@ PHONY += rv64_randconfig rv64_randconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \ -f $(srctree)/Makefile randconfig + +PHONY += rv32_defconfig +rv32_defconfig: + $(Q)$(MAKE) -f $(srctree)/Makefile defconfig 32-bit.config diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 67406c3763890..0377ce0fcc726 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -23,9 +23,9 @@ 888 : \new_c 889 : - .previous .org . - (889b - 888b) + (887b - 886b) .org . - (887b - 886b) + (889b - 888b) + .previous .endif .endm @@ -60,9 +60,9 @@ "888 :\n" \ new_c "\n" \ "889 :\n" \ - ".previous\n" \ ".org . - (887b - 886b) + (889b - 888b)\n" \ ".org . - (889b - 888b) + (887b - 886b)\n" \ + ".previous\n" \ ".endif\n" #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 8c2549b16ac06..618d7c5af1a2d 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -67,30 +67,4 @@ #error "Unexpected __SIZEOF_SHORT__" #endif -#ifdef __ASSEMBLY__ - -/* Common assembly source macros */ - -#ifdef CONFIG_XIP_KERNEL -.macro XIP_FIXUP_OFFSET reg - REG_L t0, _xip_fixup - add \reg, \reg, t0 -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg - la t1, __data_loc - REG_L t1, _xip_phys_offset - sub \reg, \reg, t1 - add \reg, \reg, t0 -.endm -_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET -_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET -#else -.macro XIP_FIXUP_OFFSET reg -.endm -.macro XIP_FIXUP_FLASH_OFFSET reg -.endm -#endif /* CONFIG_XIP_KERNEL */ - -#endif /* __ASSEMBLY__ */ - #endif /* _ASM_RISCV_ASM_H */ diff --git a/arch/riscv/include/asm/irq_work.h b/arch/riscv/include/asm/irq_work.h index d6c277992f76a..b53891964ae03 100644 --- a/arch/riscv/include/asm/irq_work.h +++ b/arch/riscv/include/asm/irq_work.h @@ -4,7 +4,7 @@ static inline bool arch_irq_work_has_interrupt(void) { - return true; + return IS_ENABLED(CONFIG_SMP); } extern void arch_irq_work_raise(void); #endif /* _ASM_RISCV_IRQ_WORK_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 507cae273bc62..d6a7428f6248d 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -41,7 +41,7 @@ static inline u32 get_cycles_hi(void) static inline unsigned long random_get_entropy(void) { if (unlikely(clint_time_val == NULL)) - return 0; + return random_get_entropy_fallback(); return get_cycles(); } #define random_get_entropy() random_get_entropy() diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index 6c316093a1e59..977ee6181dabf 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -9,7 +9,6 @@ */ #define __ARCH_WANT_SYS_CLONE -#define __ARCH_WANT_MEMFD_SECRET #include diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h new file mode 100644 index 0000000000000..d4ffc3c37649f --- /dev/null +++ b/arch/riscv/include/asm/xip_fixup.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * XIP fixup macros, only useful in assembly. + */ +#ifndef _ASM_RISCV_XIP_FIXUP_H +#define _ASM_RISCV_XIP_FIXUP_H + +#include + +#ifdef CONFIG_XIP_KERNEL +.macro XIP_FIXUP_OFFSET reg + REG_L t0, _xip_fixup + add \reg, \reg, t0 +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg + la t1, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 +.endm + +_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET +#else +.macro XIP_FIXUP_OFFSET reg +.endm +.macro XIP_FIXUP_FLASH_OFFSET reg +.endm +#endif /* CONFIG_XIP_KERNEL */ + +#endif diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 8062996c2dfd0..d95fbf5846b0b 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -21,6 +21,7 @@ #endif /* __LP64__ */ #define __ARCH_WANT_SYS_CLONE3 +#define __ARCH_WANT_MEMFD_SECRET #include diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c index 0241592982314..1aa540350abd3 100644 --- a/arch/riscv/kernel/efi.c +++ b/arch/riscv/kernel/efi.c @@ -65,7 +65,7 @@ static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) if (md->attribute & EFI_MEMORY_RO) { val = pte_val(pte) & ~_PAGE_WRITE; - val = pte_val(pte) | _PAGE_READ; + val |= _PAGE_READ; pte = __pte(val); } if (md->attribute & EFI_MEMORY_XP) { diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 893b8bb693912..b865046e4dbbc 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,6 +14,7 @@ #include #include #include +#include #include "efi-header.S" __HEAD @@ -297,6 +298,7 @@ clear_bss_done: REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ + la tp, init_task la sp, init_thread_union + THREAD_SIZE XIP_FIXUP_OFFSET sp #ifdef CONFIG_BUILTIN_DTB diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cbef0fc73afa8..df8e24559035c 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -65,7 +65,9 @@ machine_kexec_prepare(struct kimage *image) if (image->segment[i].memsz <= sizeof(fdt)) continue; - if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) + if (image->file_mode) + memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); + else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) continue; if (fdt_check_header(&fdt)) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 834eb652a7b9d..e0a00739bd13b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -189,7 +189,7 @@ static void __init init_resources(void) res = &mem_res[res_idx--]; res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; @@ -214,7 +214,7 @@ static void __init init_resources(void) if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; } else { res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S index 4b07b809a2b8c..aafcca58c19de 100644 --- a/arch/riscv/kernel/suspend_entry.S +++ b/arch/riscv/kernel/suspend_entry.S @@ -8,6 +8,7 @@ #include #include #include +#include .text .altmacro diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 05ed641a1134c..39e2e1d0e94f4 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -677,7 +677,7 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) } #endif /* CONFIG_STRICT_KERNEL_RWX */ -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) static void __init disable_pgtable_l5(void) { pgtable_l5_enabled = false; diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 54c7536f2482d..1023e9d43d443 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -701,7 +701,7 @@ static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, unsigned int nbytes) { gw->walk_bytes_remain -= nbytes; - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); scatterwalk_advance(&gw->walk, nbytes); scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); gw->walk_ptr = NULL; @@ -776,7 +776,7 @@ static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - scatterwalk_unmap(&gw->walk); + scatterwalk_unmap(gw->walk_ptr); gw->walk_ptr = NULL; gw->ptr = gw->buf; diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 1effac6a01520..1c4f585dd39b6 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -369,7 +369,7 @@ void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev); struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); /* Function from drivers/s390/cio/chsc.c */ -int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); +int chsc_sstpc(void *page, unsigned int op, u16 ctrl, long *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 7f3c9ac34bd8d..63098df81c9f2 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -9,6 +9,8 @@ #ifndef _S390_KEXEC_H #define _S390_KEXEC_H +#include + #include #include #include @@ -83,4 +85,12 @@ struct kimage_arch { extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index d9d5350cc3ec3..bf15da0fedbca 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -46,10 +46,17 @@ static inline bool test_preempt_need_resched(void) static inline void __preempt_count_add(int val) { - if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) - __atomic_add_const(val, &S390_lowcore.preempt_count); - else - __atomic_add(val, &S390_lowcore.preempt_count); + /* + * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES + * enabled, gcc 12 fails to handle __builtin_constant_p(). + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES)) { + if (__builtin_constant_p(val) && (val >= -128) && (val <= 127)) { + __atomic_add_const(val, &S390_lowcore.preempt_count); + return; + } + } + __atomic_add(val, &S390_lowcore.preempt_count); } static inline void __preempt_count_sub(int val) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 2cfce42aa7fc4..ce878e85b6e4e 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -197,6 +197,7 @@ static inline cycles_t get_cycles(void) { return (cycles_t) get_tod_clock() >> 2; } +#define get_cycles get_cycles int get_phys_clock(unsigned long *clock); void init_cpu_timer(void); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 59b69c8ab5e1f..85e9703e52a8c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -258,6 +258,10 @@ ENTRY(sie64a) BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) +# Let the next instruction be NOP to avoid triggering a machine check +# and handling it in a guest as result of the instruction execution. + nopr 7 +.Lsie_leave: BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: @@ -557,7 +561,7 @@ ENTRY(mcck_int_handler) jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) OUTSIDE %r9,.Lsie_gmap,.Lsie_done,6f - OUTSIDE %r9,.Lsie_entry,.Lsie_skip,4f + OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST j 5f 4: CHKSTG .Lmcck_panic diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index ea7729bebaa07..a7f8db73984b0 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -30,7 +30,7 @@ static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs) if (!stack) return NULL; - return (struct kvm_s390_sie_block *) stack->empty1[0]; + return (struct kvm_s390_sie_block *)stack->empty1[1]; } static bool is_in_guest(struct pt_regs *regs) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 799147658dee2..1cd523748bd2e 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm sys_pmadv_ksm diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 326cb8f75f58e..f0a1484ee00b0 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -364,7 +364,7 @@ static inline int check_sync_clock(void) * Apply clock delta to the global data structures. * This is called once on the CPU that performed the clock sync. */ -static void clock_sync_global(unsigned long delta) +static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; @@ -400,7 +400,7 @@ static void clock_sync_global(unsigned long delta) * Apply clock delta to the per-CPU data structures of this CPU. * This is called for each online CPU after the call to clock_sync_global. */ -static void clock_sync_local(unsigned long delta) +static void clock_sync_local(long delta) { /* Add the delta to the clock comparator. */ if (S390_lowcore.clock_comparator != clock_comparator_max) { @@ -424,7 +424,7 @@ static void __init time_init_wq(void) struct clock_sync_data { atomic_t cpus; int in_sync; - unsigned long clock_delta; + long clock_delta; }; /* @@ -544,7 +544,7 @@ static int stpinfo_valid(void) static int stp_sync_clock(void *data) { struct clock_sync_data *sync = data; - u64 clock_delta, flags; + long clock_delta, flags; static int first; int rc; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1ac73917a8d39..b8ae4a4aa2ba4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2608,6 +2608,18 @@ static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr, return 0; } +/* + * Give a chance to schedule after setting a key to 256 pages. + * We only hold the mm lock, which is a rwsem and the kvm srcu. + * Both can sleep. + */ +static int __s390_enable_skey_pmd(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + cond_resched(); + return 0; +} + static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, unsigned long hmask, unsigned long next, struct mm_walk *walk) @@ -2630,12 +2642,14 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr, end = start + HPAGE_SIZE - 1; __storage_key_init_range(start, end); set_bit(PG_arch_1, &page->flags); + cond_resched(); return 0; } static const struct mm_walk_ops enable_skey_walk_ops = { .hugetlb_entry = __s390_enable_skey_hugetlb, .pte_entry = __s390_enable_skey_pte, + .pmd_entry = __s390_enable_skey_pmd, }; int s390_enable_skey(void) diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 2de85c977f54f..cfc75fa43eae4 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h index 542915b462097..f86326a6f89e0 100644 --- a/arch/sparc/include/asm/timex_32.h +++ b/arch/sparc/include/asm/timex_32.h @@ -9,8 +9,6 @@ #define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ -/* XXX Maybe do something better at some point... -DaveM */ -typedef unsigned long cycles_t; -#define get_cycles() (0) +#include #endif diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index f9fe502b81c65..dad38960d1a8a 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -779,5 +779,6 @@ static_assert(offsetof(compat_siginfo_t, si_upper) == 0x18); static_assert(offsetof(compat_siginfo_t, si_pkey) == 0x14); static_assert(offsetof(compat_siginfo_t, si_perf_data) == 0x10); static_assert(offsetof(compat_siginfo_t, si_perf_type) == 0x14); +static_assert(offsetof(compat_siginfo_t, si_perf_flags) == 0x18); static_assert(offsetof(compat_siginfo_t, si_band) == 0x0c); static_assert(offsetof(compat_siginfo_t, si_fd) == 0x10); diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 8b9fc76cd3e02..570e43e6fda5c 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -590,5 +590,6 @@ static_assert(offsetof(siginfo_t, si_upper) == 0x28); static_assert(offsetof(siginfo_t, si_pkey) == 0x20); static_assert(offsetof(siginfo_t, si_perf_data) == 0x18); static_assert(offsetof(siginfo_t, si_perf_type) == 0x20); +static_assert(offsetof(siginfo_t, si_perf_flags) == 0x24); static_assert(offsetof(siginfo_t, si_band) == 0x10); static_assert(offsetof(siginfo_t, si_fd) == 0x14); diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4398cc6fb68dd..d2c0a6426f6b8 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -496,3 +496,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 62997055c4547..26a702a065154 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -133,7 +133,7 @@ static void line_timer_cb(struct work_struct *work) struct line *line = container_of(work, struct line, task.work); if (!line->throttled) - chan_interrupt(line, line->driver->read_irq); + chan_interrupt(line, line->read_irq); } int enable_chan(struct line *line) @@ -195,9 +195,9 @@ void free_irqs(void) chan = list_entry(ele, struct chan, free_list); if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); + um_free_irq(chan->line->read_irq, chan); if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); + um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } } @@ -215,9 +215,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq) spin_unlock_irqrestore(&irqs_to_free_lock, flags); } else { if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); + um_free_irq(chan->line->read_irq, chan); if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); + um_free_irq(chan->line->write_irq, chan); chan->enabled = 0; } if (chan->ops->close != NULL) diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c index 6040817c036f3..25727ed648b72 100644 --- a/arch/um/drivers/chan_user.c +++ b/arch/um/drivers/chan_user.c @@ -220,7 +220,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, unsigned long *stack_out) { struct winch_data data; - int fds[2], n, err; + int fds[2], n, err, pid; char c; err = os_pipe(fds, 1, 1); @@ -238,8 +238,9 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, * problem with /dev/net/tun, which if held open by this * thread, prevents the TUN/TAP device from being reused. */ - err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); - if (err < 0) { + pid = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out); + if (pid < 0) { + err = pid; printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n", -err); goto out_close; @@ -263,7 +264,7 @@ static int winch_tramp(int fd, struct tty_port *port, int *fd_out, goto out_close; } - return err; + return pid; out_close: close(fds[1]); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 8febf95da96e1..02b0befd67632 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -139,7 +139,7 @@ static int flush_buffer(struct line *line) count = line->buffer + LINE_BUFSIZE - line->head; n = write_chan(line->chan_out, line->head, count, - line->driver->write_irq); + line->write_irq); if (n < 0) return n; if (n == count) { @@ -156,7 +156,7 @@ static int flush_buffer(struct line *line) count = line->tail - line->head; n = write_chan(line->chan_out, line->head, count, - line->driver->write_irq); + line->write_irq); if (n < 0) return n; @@ -195,7 +195,7 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len) ret = buffer_data(line, buf, len); else { n = write_chan(line->chan_out, buf, len, - line->driver->write_irq); + line->write_irq); if (n < 0) { ret = n; goto out_up; @@ -215,7 +215,7 @@ void line_throttle(struct tty_struct *tty) { struct line *line = tty->driver_data; - deactivate_chan(line->chan_in, line->driver->read_irq); + deactivate_chan(line->chan_in, line->read_irq); line->throttled = 1; } @@ -224,7 +224,7 @@ void line_unthrottle(struct tty_struct *tty) struct line *line = tty->driver_data; line->throttled = 0; - chan_interrupt(line, line->driver->read_irq); + chan_interrupt(line, line->read_irq); } static irqreturn_t line_write_interrupt(int irq, void *data) @@ -260,19 +260,23 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) int err; if (input) { - err = um_request_irq(driver->read_irq, fd, IRQ_READ, - line_interrupt, IRQF_SHARED, + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_READ, + line_interrupt, 0, driver->read_irq_name, data); if (err < 0) return err; + + line->read_irq = err; } if (output) { - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, - line_write_interrupt, IRQF_SHARED, + err = um_request_irq(UM_IRQ_ALLOC, fd, IRQ_WRITE, + line_write_interrupt, 0, driver->write_irq_name, data); if (err < 0) return err; + + line->write_irq = err; } return 0; diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h index bdb16b96e76fd..f15be75a3bf3b 100644 --- a/arch/um/drivers/line.h +++ b/arch/um/drivers/line.h @@ -23,9 +23,7 @@ struct line_driver { const short minor_start; const short type; const short subtype; - const int read_irq; const char *read_irq_name; - const int write_irq; const char *write_irq_name; struct mc_device mc; struct tty_driver *driver; @@ -35,6 +33,8 @@ struct line { struct tty_port port; int valid; + int read_irq, write_irq; + char *init_str; struct list_head chan_list; struct chan *chan_in, *chan_out; diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c index 41eae2e8fb652..8514966778d53 100644 --- a/arch/um/drivers/ssl.c +++ b/arch/um/drivers/ssl.c @@ -47,9 +47,7 @@ static struct line_driver driver = { .minor_start = 64, .type = TTY_DRIVER_TYPE_SERIAL, .subtype = 0, - .read_irq = SSL_IRQ, .read_irq_name = "ssl", - .write_irq = SSL_WRITE_IRQ, .write_irq_name = "ssl-write", .mc = { .list = LIST_HEAD_INIT(driver.mc.list), diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c index e8b762f4d8c25..489d5a746ed33 100644 --- a/arch/um/drivers/stdio_console.c +++ b/arch/um/drivers/stdio_console.c @@ -53,9 +53,7 @@ static struct line_driver driver = { .minor_start = 0, .type = TTY_DRIVER_TYPE_CONSOLE, .subtype = SYSTEM_TYPE_CONSOLE, - .read_irq = CONSOLE_IRQ, .read_irq_name = "console", - .write_irq = CONSOLE_WRITE_IRQ, .write_irq_name = "console-write", .mc = { .list = LIST_HEAD_INIT(driver.mc.list), diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index ba562d68dc048..82ff3785bf69f 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -63,6 +63,7 @@ struct virtio_uml_device { u8 config_changed_irq:1; uint64_t vq_irq_vq_map; + int recv_rc; }; struct virtio_uml_vq_info { @@ -148,14 +149,6 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev, rc = vhost_user_recv_header(fd, msg); - if (rc == -ECONNRESET && vu_dev->registered) { - struct virtio_uml_platform_data *pdata; - - pdata = vu_dev->pdata; - - virtio_break_device(&vu_dev->vdev); - schedule_work(&pdata->conn_broken_wk); - } if (rc) return rc; size = msg->header.size; @@ -164,6 +157,21 @@ static int vhost_user_recv(struct virtio_uml_device *vu_dev, return full_read(fd, &msg->payload, size, false); } +static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, + int rc) +{ + struct virtio_uml_platform_data *pdata = vu_dev->pdata; + + if (rc != -ECONNRESET) + return; + + if (!vu_dev->registered) + return; + + virtio_break_device(&vu_dev->vdev); + schedule_work(&pdata->conn_broken_wk); +} + static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) @@ -171,8 +179,10 @@ static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size, true); - if (rc) + if (rc) { + vhost_user_check_reset(vu_dev, rc); return rc; + } if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) return -EPROTO; @@ -369,6 +379,7 @@ static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, sizeof(msg.msg.payload) + sizeof(msg.extra_payload)); + vu_dev->recv_rc = rc; if (rc) return IRQ_NONE; @@ -412,7 +423,9 @@ static irqreturn_t vu_req_interrupt(int irq, void *data) if (!um_irq_timetravel_handler_used()) ret = vu_req_read_message(vu_dev, NULL); - if (vu_dev->vq_irq_vq_map) { + if (vu_dev->recv_rc) { + vhost_user_check_reset(vu_dev, vu_dev->recv_rc); + } else if (vu_dev->vq_irq_vq_map) { struct virtqueue *vq; virtio_device_for_each_vq((&vu_dev->vdev), vq) { diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index f1f3f52f1e9cc..b2d834a29f3a9 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += bug.h generic-y += compat.h generic-y += current.h generic-y += device.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h index e187c789369d3..749dfe8512e84 100644 --- a/arch/um/include/asm/irq.h +++ b/arch/um/include/asm/irq.h @@ -4,19 +4,15 @@ #define TIMER_IRQ 0 #define UMN_IRQ 1 -#define CONSOLE_IRQ 2 -#define CONSOLE_WRITE_IRQ 3 -#define UBD_IRQ 4 -#define UM_ETH_IRQ 5 -#define SSL_IRQ 6 -#define SSL_WRITE_IRQ 7 -#define ACCEPT_IRQ 8 -#define MCONSOLE_IRQ 9 -#define WINCH_IRQ 10 -#define SIGIO_WRITE_IRQ 11 -#define TELNETD_IRQ 12 -#define XTERM_IRQ 13 -#define RANDOM_IRQ 14 +#define UBD_IRQ 2 +#define UM_ETH_IRQ 3 +#define ACCEPT_IRQ 4 +#define MCONSOLE_IRQ 5 +#define WINCH_IRQ 6 +#define SIGIO_WRITE_IRQ 7 +#define TELNETD_IRQ 8 +#define XTERM_IRQ 9 +#define RANDOM_IRQ 10 #ifdef CONFIG_UML_NET_VECTOR diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 1395cbd7e340d..c7b4b49826a2a 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -60,6 +60,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 7 #define TIF_NOTIFY_RESUME 8 #define TIF_SECCOMP 9 /* secure computing */ +#define TIF_SINGLESTEP 10 /* single stepping userspace */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -68,5 +69,6 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #endif diff --git a/arch/um/include/asm/timex.h b/arch/um/include/asm/timex.h index e392a9a5bc9bd..9f27176adb26d 100644 --- a/arch/um/include/asm/timex.h +++ b/arch/um/include/asm/timex.h @@ -2,13 +2,8 @@ #ifndef __UM_TIMEX_H #define __UM_TIMEX_H -typedef unsigned long cycles_t; - -static inline cycles_t get_cycles (void) -{ - return 0; -} - #define CLOCK_TICK_RATE (HZ) +#include + #endif diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index c85e40c72779f..58938d75871af 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -43,7 +43,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) { PT_REGS_IP(regs) = eip; PT_REGS_SP(regs) = esp; - current->ptrace &= ~PT_DTRACE; + clear_thread_flag(TIF_SINGLESTEP); #ifdef SUBARCH_EXECVE1 SUBARCH_EXECVE1(regs->regs); #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 80504680be084..88c5c78442813 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -335,7 +335,7 @@ int singlestepping(void * t) { struct task_struct *task = t ? t : current; - if (!(task->ptrace & PT_DTRACE)) + if (!test_thread_flag(TIF_SINGLESTEP)) return 0; if (task->thread.singlestep_syscall) diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index bfaf6ab1ac037..5154b27de580f 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -11,7 +11,7 @@ void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_DTRACE; + set_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -21,7 +21,7 @@ void user_enable_single_step(struct task_struct *child) void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_DTRACE; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); child->thread.singlestep_syscall = 0; #ifdef SUBARCH_SET_SINGLESTEPPING @@ -120,7 +120,7 @@ static void send_sigtrap(struct uml_pt_regs *regs, int error_code) } /* - * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and + * XXX Check TIF_SINGLESTEP for singlestepping check and * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check */ int syscall_trace_enter(struct pt_regs *regs) @@ -144,7 +144,7 @@ void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(regs); /* Fake a debug trap */ - if (ptraced & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 88cd9b5c1b744..ae4658f576ab7 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -53,7 +53,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) unsigned long sp; int err; - if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED)) + if (test_thread_flag(TIF_SINGLESTEP) && (current->ptrace & PT_PTRACED)) singlestep = 1; /* Did we come from a system call? */ @@ -128,7 +128,7 @@ void do_signal(struct pt_regs *regs) * on the host. The tracing thread will check this flag and * PTRACE_SYSCALL if necessary. */ - if (current->ptrace & PT_DTRACE) + if (test_thread_flag(TIF_SINGLESTEP)) current->thread.singlestep_syscall = is_syscall(PT_REGS_IP(¤t->thread.regs)); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4bed3abf444d1..306ab7337d728 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -86,6 +86,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2 select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY @@ -1313,7 +1314,7 @@ config MICROCODE config MICROCODE_INTEL bool "Intel microcode loading support" - depends on MICROCODE + depends on CPU_SUP_INTEL && MICROCODE default MICROCODE help This options enables microcode patch loading support for Intel @@ -1325,7 +1326,7 @@ config MICROCODE_INTEL config MICROCODE_AMD bool "AMD microcode loading support" - depends on MICROCODE + depends on CPU_SUP_AMD && MICROCODE help If you select this option, microcode patch loading support for AMD processors will be enabled. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 542377cd419d7..22b919cdb6d19 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -157,7 +157,7 @@ config MPENTIUM4 config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 help Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,98 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" help Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + help + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + help + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + help + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + help + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + help + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + help + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + help + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + help + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 + bool "AMD Zen 3" + depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + help + Select this for AMD Family 19h Zen 3 processors. + + Enables -march=znver3 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -270,7 +356,7 @@ config MPSC in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" help Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,6 +364,8 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + Enables -march=core2 + config MATOM bool "Intel Atom" help @@ -287,6 +375,182 @@ config MATOM accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP + help + + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + help + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + help + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + help + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + help + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + help + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + help + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + help + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake + +config MCOOPERLAKE + bool "Intel Cooper Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for Xeon processors in the Cooper Lake family. + + Enables -march=cooperlake + +config MTIGERLAKE + bool "Intel Tiger Lake" + depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Tiger Lake family. + + Enables -march=tigerlake + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Sapphire Rapids family. + + Enables -march=sapphirerapids + +config MROCKETLAKE + bool "Intel Rocket Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for eleventh-generation processors in the Rocket Lake family. + + Enables -march=rocketlake + +config MALDERLAKE + bool "Intel Alder Lake" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + select X86_P6_NOP + help + + Select this for twelfth-generation processors in the Alder Lake family. + + Enables -march=alderlake + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -294,6 +558,50 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config GENERIC_CPU2 + bool "Generic-x86-64-v2" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs with min support of x86-64-v2. + +config GENERIC_CPU3 + bool "Generic-x86-64-v3" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64-v3 CPU with v3 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v3. + +config GENERIC_CPU4 + bool "Generic-x86-64-v4" + depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000) + depends on X86_64 + help + Generic x86-64 CPU with v4 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v4. + +config MNATIVE_INTEL + bool "Intel-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for AMD CPUs. Intel Only! + + Enables -march=native + +config MNATIVE_AMD + bool "AMD-Native optimizations autodetected by the compiler" + help + + Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for Intel CPUs. AMD Only! + + Enables -march=native + endchoice config X86_GENERIC @@ -318,7 +626,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 || GENERIC_CPU3 || GENERIC_CPU4 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,11 +644,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # # P6_NOPs are a relatively minor optimization that require a family >= @@ -356,26 +664,26 @@ config X86_USE_PPRO_CHECKSUM config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 63d50f65b8283..8cd235d386342 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -143,8 +143,44 @@ else # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += -march=k8 cflags-$(CONFIG_MPSC) += -march=nocona - cflags-$(CONFIG_MCORE2) += -march=core2 - cflags-$(CONFIG_MATOM) += -march=atom + cflags-$(CONFIG_MK8SSE3) += -march=k8-sse3 + cflags-$(CONFIG_MK10) += -march=amdfam10 + cflags-$(CONFIG_MBARCELONA) += -march=barcelona + cflags-$(CONFIG_MBOBCAT) += -march=btver1 + cflags-$(CONFIG_MJAGUAR) += -march=btver2 + cflags-$(CONFIG_MBULLDOZER) += -march=bdver1 + cflags-$(CONFIG_MPILEDRIVER) += -march=bdver2 -mno-tbm + cflags-$(CONFIG_MSTEAMROLLER) += -march=bdver3 -mno-tbm + cflags-$(CONFIG_MEXCAVATOR) += -march=bdver4 -mno-tbm + cflags-$(CONFIG_MZEN) += -march=znver1 + cflags-$(CONFIG_MZEN2) += -march=znver2 + cflags-$(CONFIG_MZEN3) += -march=znver3 + cflags-$(CONFIG_MNATIVE_INTEL) += -march=native + cflags-$(CONFIG_MNATIVE_AMD) += -march=native + cflags-$(CONFIG_MATOM) += -march=bonnell + cflags-$(CONFIG_MCORE2) += -march=core2 + cflags-$(CONFIG_MNEHALEM) += -march=nehalem + cflags-$(CONFIG_MWESTMERE) += -march=westmere + cflags-$(CONFIG_MSILVERMONT) += -march=silvermont + cflags-$(CONFIG_MGOLDMONT) += -march=goldmont + cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus + cflags-$(CONFIG_MSANDYBRIDGE) += -march=sandybridge + cflags-$(CONFIG_MIVYBRIDGE) += -march=ivybridge + cflags-$(CONFIG_MHASWELL) += -march=haswell + cflags-$(CONFIG_MBROADWELL) += -march=broadwell + cflags-$(CONFIG_MSKYLAKE) += -march=skylake + cflags-$(CONFIG_MSKYLAKEX) += -march=skylake-avx512 + cflags-$(CONFIG_MCANNONLAKE) += -march=cannonlake + cflags-$(CONFIG_MICELAKE) += -march=icelake-client + cflags-$(CONFIG_MCASCADELAKE) += -march=cascadelake + cflags-$(CONFIG_MCOOPERLAKE) += -march=cooperlake + cflags-$(CONFIG_MTIGERLAKE) += -march=tigerlake + cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids + cflags-$(CONFIG_MROCKETLAKE) += -march=rocketlake + cflags-$(CONFIG_MALDERLAKE) += -march=alderlake + cflags-$(CONFIG_GENERIC_CPU2) += -march=x86-64-v2 + cflags-$(CONFIG_GENERIC_CPU3) += -march=x86-64-v3 + cflags-$(CONFIG_GENERIC_CPU4) += -march=x86-64-v4 cflags-$(CONFIG_GENERIC_CPU) += -mtune=generic KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 73d958522b6a4..d8376e5fe1afe 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -508,6 +508,7 @@ SYM_CODE_START(\asmsym) call vc_switch_off_ist movq %rax, %rsp /* Switch to new stack */ + ENCODE_FRAME_POINTER UNWIND_HINT_REGS /* Update pt_regs */ diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 320480a8db4f8..331aaf1a782ff 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -455,3 +455,4 @@ 448 i386 process_mrelease sys_process_mrelease 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node +451 i386 pmadv_ksm sys_pmadv_ksm diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index c84d12608cd2d..14902db4c01fc 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -372,6 +372,7 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 235a5794296ac..1000d457c3321 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -438,7 +438,7 @@ bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs) static __init int vdso_setup(char *s) { vdso64_enabled = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("vdso=", vdso_setup); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 9739019d4b67a..2704ec1e42a30 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -304,6 +304,16 @@ static int perf_ibs_init(struct perf_event *event) hwc->config_base = perf_ibs->msr; hwc->config = config; + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. Setting _EARLY flag + * makes sure we unwind call-stack before perf sample rip is set to + * IbsOpRip. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY; + return 0; } @@ -687,6 +697,14 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) data.raw = &raw; } + /* + * rip recorded by IbsOpRip will not be consistent with rsp and rbp + * recorded as part of interrupt regs. Thus we need to use rip from + * interrupt regs while unwinding call stack. + */ + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + data.callchain = perf_callchain(event, iregs); + throttle = perf_event_overflow(event, &data, ®s); out: if (throttle) { @@ -759,9 +777,10 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) return ret; } -static __init void perf_event_ibs_init(void) +static __init int perf_event_ibs_init(void) { struct attribute **attr = ibs_op_format_attrs; + int ret; /* * Some chips fail to reset the fetch count when it is written; instead @@ -773,7 +792,9 @@ static __init void perf_event_ibs_init(void) if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; - perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + ret = perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); + if (ret) + return ret; if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; @@ -786,15 +807,35 @@ static __init void perf_event_ibs_init(void) perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK; } - perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + ret = perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + if (ret) + goto err_op; + + ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); + if (ret) + goto err_nmi; - register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); + return 0; + +err_nmi: + perf_pmu_unregister(&perf_ibs_op.pmu); + free_percpu(perf_ibs_op.pcpu); + perf_ibs_op.pcpu = NULL; +err_op: + perf_pmu_unregister(&perf_ibs_fetch.pmu); + free_percpu(perf_ibs_fetch.pcpu); + perf_ibs_fetch.pcpu = NULL; + + return ret; } #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ -static __init void perf_event_ibs_init(void) { } +static __init int perf_event_ibs_init(void) +{ + return 0; +} #endif @@ -1064,9 +1105,7 @@ static __init int amd_ibs_init(void) x86_pmu_amd_ibs_starting_cpu, x86_pmu_amd_ibs_dying_cpu); - perf_event_ibs_init(); - - return 0; + return perf_event_ibs_init(); } /* Since we need the pci subsystem to init ibs we can't do this earlier: */ diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fc7f458eb3de6..c6e7d358fb8d8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -276,7 +276,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf), INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */ - INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x56, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf), INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.CYCLES_MEM_ANY */ diff --git a/arch/x86/include/asm/acenv.h b/arch/x86/include/asm/acenv.h index 9aff97f0de7fd..d937c55e717e6 100644 --- a/arch/x86/include/asm/acenv.h +++ b/arch/x86/include/asm/acenv.h @@ -13,7 +13,19 @@ /* Asm macros */ -#define ACPI_FLUSH_CPU_CACHE() wbinvd() +/* + * ACPI_FLUSH_CPU_CACHE() flushes caches on entering sleep states. + * It is required to prevent data loss. + * + * While running inside virtual machine, the kernel can bypass cache flushing. + * Changing sleep state in a virtual machine doesn't affect the host system + * sleep state and cannot lead to data loss. + */ +#define ACPI_FLUSH_CPU_CACHE() \ +do { \ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) \ + wbinvd(); \ +} while (0) int __acpi_acquire_global_lock(unsigned int *lock); int __acpi_release_global_lock(unsigned int *lock); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1261842d006c7..49a3b122279e5 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -51,7 +51,7 @@ extern const char * const x86_power_flags[32]; extern const char * const x86_bug_flags[NBUGINTS*32]; #define test_cpu_cap(c, bit) \ - test_bit(bit, (unsigned long *)((c)->x86_capability)) + arch_test_bit(bit, (unsigned long *)((c)->x86_capability)) /* * There are 32 bits/features in each mask word. The high bits diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 73e643ae94b6f..e17de69faa543 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -443,5 +443,6 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 11b7c06e2828c..6ad8d946cd3eb 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -186,6 +186,14 @@ extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages); #define arch_kexec_pre_free_pages arch_kexec_pre_free_pages +#ifdef CONFIG_KEXEC_FILE +struct purgatory_info; +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab); +#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add +#endif #endif typedef void crash_vmclear_fn(void); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ee15311b6be1d..4425d6773183b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -114,6 +114,30 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -131,6 +155,7 @@ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index acbaeaf83b61a..da251a5645b0e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -269,6 +269,8 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 62ab07e24aef9..9cb3cf4cf6dde 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -820,7 +820,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -1424,10 +1425,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index 78ca535124864..b45c4d27fd46e 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -86,56 +86,4 @@ bool kernel_page_present(struct page *page); extern int kernel_set_to_readonly; -#ifdef CONFIG_X86_64 -/* - * Prevent speculative access to the page by either unmapping - * it (if we do not require access to any part of the page) or - * marking it uncacheable (if we want to try to retrieve data - * from non-poisoned lines in the page). - */ -static inline int set_mce_nospec(unsigned long pfn, bool unmap) -{ - unsigned long decoy_addr; - int rc; - - /* SGX pages are not in the 1:1 map */ - if (arch_is_platform_page(pfn << PAGE_SHIFT)) - return 0; - /* - * We would like to just call: - * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); - * but doing that would radically increase the odds of a - * speculative access to the poison page because we'd have - * the virtual address of the kernel 1:1 mapping sitting - * around in registers. - * Instead we get tricky. We create a non-canonical address - * that looks just like the one we want, but has bit 63 flipped. - * This relies on set_memory_XX() properly sanitizing any __pa() - * results with __PHYSICAL_MASK or PTE_PFN_MASK. - */ - decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); - - if (unmap) - rc = set_memory_np(decoy_addr, 1); - else - rc = set_memory_uc(decoy_addr, 1); - if (rc) - pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); - return rc; -} -#define set_mce_nospec set_mce_nospec - -/* Restore full speculative operation to the pfn. */ -static inline int clear_mce_nospec(unsigned long pfn) -{ - return set_memory_wb((unsigned long) pfn_to_kaddr(pfn), 1); -} -#define clear_mce_nospec clear_mce_nospec -#else -/* - * Few people would run a 32-bit kernel on a machine that supports - * recoverable errors because they have too much memory to boot 32-bit. - */ -#endif - #endif /* _ASM_X86_SET_MEMORY_H */ diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 7b132d0312ebf..a800abb1a9925 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -19,7 +19,6 @@ struct saved_context { u16 gs; unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; struct desc_ptr gdt_desc; struct desc_ptr idt; @@ -28,6 +27,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); /* routines for saving/restoring kernel state */ diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index 35bb35d28733e..54df06687d834 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -14,9 +14,13 @@ * Image of the saved processor state, used by the low level ACPI suspend to * RAM code and by the low level hibernation code. * - * If you modify it, fix arch/x86/kernel/acpi/wakeup_64.S and make sure that - * __save/__restore_processor_state(), defined in arch/x86/kernel/suspend_64.c, - * still work as required. + * If you modify it, check how it is used in arch/x86/kernel/acpi/wakeup_64.S + * and make sure that __save/__restore_processor_state(), defined in + * arch/x86/power/cpu.c, still work as required. + * + * Because the structure is packed, make sure to avoid unaligned members. For + * optimisation purposes but also because tools like kmemleak only search for + * pointers that are aligned. */ struct saved_context { struct pt_regs regs; @@ -36,7 +40,6 @@ struct saved_context { unsigned long cr0, cr2, cr3, cr4; u64 misc_enable; - bool misc_enable_saved; struct saved_msrs saved_msrs; unsigned long efer; u16 gdt_pad; /* Unused */ @@ -48,6 +51,7 @@ struct saved_context { unsigned long tr; unsigned long safety; unsigned long return_address; + bool misc_enable_saved; } __attribute__((packed)); #define loaddebug(thread,register) \ diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index a4a8b1b16c0c1..956e4145311b1 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h @@ -5,6 +5,15 @@ #include #include +static inline unsigned long random_get_entropy(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) + return random_get_entropy_fallback(); + return rdtsc(); +} +#define random_get_entropy random_get_entropy + /* Assume we use the PIT time source for the clock tick */ #define CLOCK_TICK_RATE PIT_TICK_RATE diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 01a300a9700b9..fbdc3d9514943 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -20,13 +20,12 @@ extern void disable_TSC(void); static inline cycles_t get_cycles(void) { -#ifndef CONFIG_X86_TSC - if (!boot_cpu_has(X86_FEATURE_TSC)) + if (!IS_ENABLED(CONFIG_X86_TSC) && + !cpu_feature_enabled(X86_FEATURE_TSC)) return 0; -#endif - return rdtsc(); } +#define get_cycles get_cycles extern struct system_counterval_t convert_art_to_tsc(u64 art); extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index f78e2b3501a19..913e593a3b45f 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -382,6 +382,103 @@ do { \ #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm_volatile_goto("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + _ASM_EXTABLE_UA(1b, %l[label]) \ + : CC_OUT(z) (success), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory" \ + : label); \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) +#endif // CONFIG_X86_32 +#else // !CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT +#define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ + int __err = 0; \ + bool success; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\ + CC_SET(z) \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, \ + %[errout]) \ + : CC_OUT(z) (success), \ + [errout] "+r" (__err), \ + [ptr] "+m" (*_ptr), \ + [old] "+a" (__old) \ + : [new] ltype (__new) \ + : "memory"); \ + if (unlikely(__err)) \ + goto label; \ + if (unlikely(!success)) \ + *_old = __old; \ + likely(success); }) + +#ifdef CONFIG_X86_32 +/* + * Unlike the normal CMPXCHG, hardcode ECX for both success/fail and error. + * There are only six GPRs available and four (EAX, EBX, ECX, and EDX) are + * hardcoded by CMPXCHG8B, leaving only ESI and EDI. If the compiler uses + * both ESI and EDI for the memory operand, compilation will fail if the error + * is an input+output as there will be no register available for input. + */ +#define __try_cmpxchg64_user_asm(_ptr, _pold, _new, label) ({ \ + int __result; \ + __typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \ + __typeof__(*(_ptr)) __old = *_old; \ + __typeof__(*(_ptr)) __new = (_new); \ + asm volatile("\n" \ + "1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \ + "mov $0, %%ecx\n\t" \ + "setz %%cl\n" \ + "2:\n" \ + _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %%ecx) \ + : [result]"=c" (__result), \ + "+A" (__old), \ + [ptr] "+m" (*_ptr) \ + : "b" ((u32)__new), \ + "c" ((u32)((u64)__new >> 32)) \ + : "memory", "cc"); \ + if (unlikely(__result < 0)) \ + goto label; \ + if (unlikely(!__result)) \ + *_old = __old; \ + likely(__result); }) +#endif // CONFIG_X86_32 +#endif // CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT + /* FIXME: this hack is definitely wrong -AK */ struct __large_struct { unsigned long buf[100]; }; #define __m(x) (*(struct __large_struct __user *)(x)) @@ -474,6 +571,51 @@ do { \ } while (0) #endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT +extern void __try_cmpxchg_user_wrong_size(void); + +#ifndef CONFIG_X86_32 +#define __try_cmpxchg64_user_asm(_ptr, _oldp, _nval, _label) \ + __try_cmpxchg_user_asm("q", "r", (_ptr), (_oldp), (_nval), _label) +#endif + +/* + * Force the pointer to u to match the size expected by the asm helper. + * clang/LLVM compiles all cases and only discards the unused paths after + * processing errors, which breaks i386 if the pointer is an 8-byte value. + */ +#define unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + bool __ret; \ + __chk_user_ptr(_ptr); \ + switch (sizeof(*(_ptr))) { \ + case 1: __ret = __try_cmpxchg_user_asm("b", "q", \ + (__force u8 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 2: __ret = __try_cmpxchg_user_asm("w", "r", \ + (__force u16 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 4: __ret = __try_cmpxchg_user_asm("l", "r", \ + (__force u32 *)(_ptr), (_oldp), \ + (_nval), _label); \ + break; \ + case 8: __ret = __try_cmpxchg64_user_asm((__force u64 *)(_ptr), (_oldp),\ + (_nval), _label); \ + break; \ + default: __try_cmpxchg_user_wrong_size(); \ + } \ + __ret; }) + +/* "Returns" 0 on success, 1 on failure, -EFAULT if the access faults. */ +#define __try_cmpxchg_user(_ptr, _oldp, _nval, _label) ({ \ + int __ret = -EFAULT; \ + __uaccess_begin_nospec(); \ + __ret = !unsafe_try_cmpxchg_user(_ptr, _oldp, _nval, _label); \ +_label: \ + __uaccess_end(); \ + __ret; \ + }) + /* * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec37..4e6a08d4c7e53 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -17,6 +17,48 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE_INTEL +#define MODULE_PROC_FAMILY "NATIVE_INTEL " +#elif defined CONFIG_MNATIVE_AMD +#define MODULE_PROC_FAMILY "NATIVE_AMD " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " +#elif defined CONFIG_MCOOPERLAKE +#define MODULE_PROC_FAMILY "COOPERLAKE " +#elif defined CONFIG_MTIGERLAKE +#define MODULE_PROC_FAMILY "TIGERLAKE " +#elif defined CONFIG_MSAPPHIRERAPIDS +#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " +#elif defined CONFIG_ROCKETLAKE +#define MODULE_PROC_FAMILY "ROCKETLAKE " +#elif defined CONFIG_MALDERLAKE +#define MODULE_PROC_FAMILY "ALDERLAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -35,6 +77,30 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " +#elif defined CONFIG_MZEN3 +#define MODULE_PROC_FAMILY "ZEN3 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b70344bf66008..ed7d9cf71f68d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -170,7 +170,7 @@ static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; notsc_setup(NULL); - return 0; + return 1; } __setup("apicpmtimer", setup_apicpmtimer); #endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5a48e66e4f54..a6e9c2794ef56 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -199,7 +199,13 @@ static void __init uv_tsc_check_sync(void) int mmr_shift; char *state; - /* Different returns from different UV BIOS versions */ + /* UV5 guarantees synced TSCs; do not zero TSC_ADJUST */ + if (!is_uv(UV2|UV3|UV4)) { + mark_tsc_async_resets("UV5+"); + return; + } + + /* UV2,3,4, UV BIOS TSC sync state available */ mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR); mmr_shift = is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6296e1ebed1db..a8a9f64063315 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,8 +41,10 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); @@ -85,6 +87,10 @@ EXPORT_SYMBOL_GPL(mds_idle_clear); */ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -117,17 +123,10 @@ void __init check_bugs(void) spectre_v2_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); - /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. - */ - mds_print_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -267,14 +266,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -329,7 +320,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -343,7 +334,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -375,18 +366,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -410,6 +389,151 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); +} + +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt @@ -471,11 +595,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; @@ -1109,6 +1235,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1120,14 +1248,17 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1172,6 +1303,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1774,6 +1915,20 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -1874,6 +2029,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + default: break; } @@ -1925,4 +2083,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e342ae4db3c4d..af5d0c188f7b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1237,18 +1237,42 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { X86_FEATURE_ANY, issues) #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | + BIT(7) | BIT(0xB), MMIO), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), {} }; @@ -1269,6 +1293,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1322,12 +1353,27 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f7a5370a9b3b8..2c87d62f191e2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -91,7 +91,7 @@ static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; - return 0; + return 1; } __setup("ring3mwait=disable", ring3mwait_disable); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 1940d305db1c0..1c87501e0fa3d 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1294,10 +1294,23 @@ static void threshold_remove_bank(struct threshold_bank *bank) kfree(bank); } +static void __threshold_remove_device(struct threshold_bank **bp) +{ + unsigned int bank, numbanks = this_cpu_read(mce_num_banks); + + for (bank = 0; bank < numbanks; bank++) { + if (!bp[bank]) + continue; + + threshold_remove_bank(bp[bank]); + bp[bank] = NULL; + } + kfree(bp); +} + int mce_threshold_remove_device(unsigned int cpu) { struct threshold_bank **bp = this_cpu_read(threshold_banks); - unsigned int bank, numbanks = this_cpu_read(mce_num_banks); if (!bp) return 0; @@ -1308,13 +1321,7 @@ int mce_threshold_remove_device(unsigned int cpu) */ this_cpu_write(threshold_banks, NULL); - for (bank = 0; bank < numbanks; bank++) { - if (bp[bank]) { - threshold_remove_bank(bp[bank]); - bp[bank] = NULL; - } - } - kfree(bp); + __threshold_remove_device(bp); return 0; } @@ -1351,15 +1358,14 @@ int mce_threshold_create_device(unsigned int cpu) if (!(this_cpu_read(bank_map) & (1 << bank))) continue; err = threshold_create_bank(bp, cpu, bank); - if (err) - goto out_err; + if (err) { + __threshold_remove_device(bp); + return err; + } } this_cpu_write(threshold_banks, bp); if (thresholding_irq_en) mce_threshold_vector = amd_threshold_interrupt; return 0; -out_err: - mce_threshold_remove_device(cpu); - return err; } diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 981496e6bc0e4..fa67bb9d1afed 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -579,7 +579,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val, pfn = mce->addr >> PAGE_SHIFT; if (!memory_failure(pfn, 0)) { - set_mce_nospec(pfn, whole_page(mce)); + set_mce_nospec(pfn); mce->kflags |= MCE_HANDLED_UC; } @@ -1316,7 +1316,7 @@ static void kill_me_maybe(struct callback_head *cb) ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags); if (!ret) { - set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT); sync_core(); return; } @@ -1342,7 +1342,7 @@ static void kill_me_never(struct callback_head *cb) p->mce_count = 0; pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr); if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0)) - set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page); + set_mce_nospec(p->mce_addr >> PAGE_SHIFT); } static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *)) diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 7c63a1911fae9..19876ebfb5044 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -12,6 +12,92 @@ #include "encls.h" #include "sgx.h" +#define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) +/* + * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to + * determine the page index associated with the first PCMD entry + * within a PCMD page. + */ +#define PCMD_FIRST_MASK GENMASK(4, 0) + +/** + * reclaimer_writing_to_pcmd() - Query if any enclave page associated with + * a PCMD page is in process of being reclaimed. + * @encl: Enclave to which PCMD page belongs + * @start_addr: Address of enclave page using first entry within the PCMD page + * + * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is + * stored. The PCMD data of a reclaimed enclave page contains enough + * information for the processor to verify the page at the time + * it is loaded back into the Enclave Page Cache (EPC). + * + * The backing storage to which enclave pages are reclaimed is laid out as + * follows: + * Encrypted enclave pages:SECS page:PCMD pages + * + * Each PCMD page contains the PCMD metadata of + * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages. + * + * A PCMD page can only be truncated if it is (a) empty, and (b) not in the + * process of getting data (and thus soon being non-empty). (b) is tested with + * a check if an enclave page sharing the PCMD page is in the process of being + * reclaimed. + * + * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it + * intends to reclaim that enclave page - it means that the PCMD page + * associated with that enclave page is about to get some data and thus + * even if the PCMD page is empty, it should not be truncated. + * + * Context: Enclave mutex (&sgx_encl->lock) must be held. + * Return: 1 if the reclaimer is about to write to the PCMD page + * 0 if the reclaimer has no intention to write to the PCMD page + */ +static int reclaimer_writing_to_pcmd(struct sgx_encl *encl, + unsigned long start_addr) +{ + int reclaimed = 0; + int i; + + /* + * PCMD_FIRST_MASK is based on number of PCMD entries within + * PCMD page being 32. + */ + BUILD_BUG_ON(PCMDS_PER_PAGE != 32); + + for (i = 0; i < PCMDS_PER_PAGE; i++) { + struct sgx_encl_page *entry; + unsigned long addr; + + addr = start_addr + i * PAGE_SIZE; + + /* + * Stop when reaching the SECS page - it does not + * have a page_array entry and its reclaim is + * started and completed with enclave mutex held so + * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED + * flag. + */ + if (addr == encl->base + encl->size) + break; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + continue; + + /* + * VA page slot ID uses same bit as the flag so it is important + * to ensure that the page is not already in backing store. + */ + if (entry->epc_page && + (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) { + reclaimed = 1; + break; + } + } + + return reclaimed; +} + /* * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's * follow right after the EPC data in the backing storage. In addition to the @@ -47,6 +133,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; struct sgx_encl *encl = encl_page->encl; pgoff_t page_index, page_pcmd_off; + unsigned long pcmd_first_page; struct sgx_pageinfo pginfo; struct sgx_backing b; bool pcmd_page_empty; @@ -58,9 +145,14 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, else page_index = PFN_DOWN(encl->size); + /* + * Address of enclave page using the first entry within the PCMD page. + */ + pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base; + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); - ret = sgx_encl_get_backing(encl, page_index, &b); + ret = sgx_encl_lookup_backing(encl, page_index, &b); if (ret) return ret; @@ -84,6 +176,7 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, } memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); + set_page_dirty(b.pcmd); /* * The area for the PCMD in the page was zeroed above. Check if the @@ -94,12 +187,20 @@ static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, kunmap_atomic(pcmd_page); kunmap_atomic((void *)(unsigned long)pginfo.contents); - sgx_encl_put_backing(&b, false); + get_page(b.pcmd); + sgx_encl_put_backing(&b); sgx_encl_truncate_backing_page(encl, page_index); - if (pcmd_page_empty) + if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) { sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); + pcmd_page = kmap_atomic(b.pcmd); + if (memchr_inv(pcmd_page, 0, PAGE_SIZE)) + pr_warn("PCMD page not empty after truncate.\n"); + kunmap_atomic(pcmd_page); + } + + put_page(b.pcmd); return ret; } @@ -617,7 +718,7 @@ static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, * 0 on success, * -errno otherwise. */ -int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, +static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, struct sgx_backing *backing) { pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); @@ -642,18 +743,113 @@ int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, return 0; } +/* + * When called from ksgxd, returns the mem_cgroup of a struct mm stored + * in the enclave's mm_list. When not called from ksgxd, just returns + * the mem_cgroup of the current task. + */ +static struct mem_cgroup *sgx_encl_get_mem_cgroup(struct sgx_encl *encl) +{ + struct mem_cgroup *memcg = NULL; + struct sgx_encl_mm *encl_mm; + int idx; + + /* + * If called from normal task context, return the mem_cgroup + * of the current task's mm. The remainder of the handling is for + * ksgxd. + */ + if (!current_is_ksgxd()) + return get_mem_cgroup_from_mm(current->mm); + + /* + * Search the enclave's mm_list to find an mm associated with + * this enclave to charge the allocation to. + */ + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + memcg = get_mem_cgroup_from_mm(encl_mm->mm); + + mmput_async(encl_mm->mm); + + break; + } + + srcu_read_unlock(&encl->srcu, idx); + + /* + * In the rare case that there isn't an mm associated with + * the enclave, set memcg to the current active mem_cgroup. + * This will be the root mem_cgroup if there is no active + * mem_cgroup. + */ + if (!memcg) + return get_mem_cgroup_from_mm(NULL); + + return memcg; +} + /** - * sgx_encl_put_backing() - Unpin the backing storage + * sgx_encl_alloc_backing() - allocate a new backing storage page + * @encl: an enclave pointer + * @page_index: enclave page index * @backing: data for accessing backing storage for the page - * @do_write: mark pages dirty + * + * When called from ksgxd, sets the active memcg from one of the + * mms in the enclave's mm_list prior to any backing page allocation, + * in order to ensure that shmem page allocations are charged to the + * enclave. + * + * Return: + * 0 on success, + * -errno otherwise. */ -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write) +int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) { - if (do_write) { - set_page_dirty(backing->pcmd); - set_page_dirty(backing->contents); - } + struct mem_cgroup *encl_memcg = sgx_encl_get_mem_cgroup(encl); + struct mem_cgroup *memcg = set_active_memcg(encl_memcg); + int ret; + + ret = sgx_encl_get_backing(encl, page_index, backing); + + set_active_memcg(memcg); + mem_cgroup_put(encl_memcg); + return ret; +} + +/** + * sgx_encl_lookup_backing() - retrieve an existing backing storage page + * @encl: an enclave pointer + * @page_index: enclave page index + * @backing: data for accessing backing storage for the page + * + * Retrieve a backing page for loading data back into an EPC page with ELDU. + * It is the caller's responsibility to ensure that it is appropriate to use + * sgx_encl_lookup_backing() rather than sgx_encl_alloc_backing(). If lookup is + * not used correctly, this will cause an allocation which is not accounted for. + * + * Return: + * 0 on success, + * -errno otherwise. + */ +int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) +{ + return sgx_encl_get_backing(encl, page_index, backing); +} + +/** + * sgx_encl_put_backing() - Unpin the backing storage + * @backing: data for accessing backing storage for the page + */ +void sgx_encl_put_backing(struct sgx_backing *backing) +{ put_page(backing->pcmd); put_page(backing->contents); } diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h index fec43ca65065b..332ef3568267e 100644 --- a/arch/x86/kernel/cpu/sgx/encl.h +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -103,11 +103,14 @@ static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr, int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, unsigned long end, unsigned long vm_flags); +bool current_is_ksgxd(void); void sgx_encl_release(struct kref *ref); int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); -int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, - struct sgx_backing *backing); -void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); +int sgx_encl_lookup_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); +int sgx_encl_alloc_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); +void sgx_encl_put_backing(struct sgx_backing *backing); int sgx_encl_test_and_clear_young(struct mm_struct *mm, struct sgx_encl_page *page); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index 8e4bc6453d263..a78652d43e61b 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -191,6 +191,8 @@ static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, backing->pcmd_offset; ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); + set_page_dirty(backing->pcmd); + set_page_dirty(backing->contents); kunmap_atomic((void *)(unsigned long)(pginfo.metadata - backing->pcmd_offset)); @@ -308,9 +310,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_ewb(epc_page, backing); encl_page->epc_page = NULL; encl->secs_child_cnt--; + sgx_encl_put_backing(backing); if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { - ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), + ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size), &secs_backing); if (ret) goto out; @@ -320,7 +323,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; - sgx_encl_put_backing(&secs_backing, true); + sgx_encl_put_backing(&secs_backing); } out: @@ -379,11 +382,14 @@ static void sgx_reclaim_pages(void) goto skip; page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); - ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); - if (ret) - goto skip; mutex_lock(&encl_page->encl->lock); + ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]); + if (ret) { + mutex_unlock(&encl_page->encl->lock); + goto skip; + } + encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; mutex_unlock(&encl_page->encl->lock); continue; @@ -411,7 +417,6 @@ static void sgx_reclaim_pages(void) encl_page = epc_page->owner; sgx_reclaimer_write(epc_page, &backing[i]); - sgx_encl_put_backing(&backing[i], true); kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; @@ -470,6 +475,11 @@ static bool __init sgx_page_reclaimer_init(void) return true; } +bool current_is_ksgxd(void) +{ + return current == ksgxd_tsk; +} + static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) { struct sgx_numa_node *node = &sgx_numa_nodes[nid]; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e28ab0ecc5378..0fdc807ae13f8 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -14,6 +14,8 @@ #include #include +#include + #include #include #include @@ -232,7 +234,20 @@ bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu) gfpu->fpstate = fpstate; gfpu->xfeatures = fpu_user_cfg.default_features; gfpu->perm = fpu_user_cfg.default_features; - gfpu->uabi_size = fpu_user_cfg.default_size; + + /* + * KVM sets the FP+SSE bits in the XSAVE header when copying FPU state + * to userspace, even when XSAVE is unsupported, so that restoring FPU + * state on a different CPU that does support XSAVE can cleanly load + * the incoming state using its natural XSAVE. In other words, KVM's + * uABI size may be larger than this host's default size. Conversely, + * the default size should never be larger than KVM's base uABI size; + * all features that can expand the uABI size must be opt-in. + */ + gfpu->uabi_size = sizeof(struct kvm_xsave); + if (WARN_ON_ONCE(fpu_user_cfg.default_size > gfpu->uabi_size)) + gfpu->uabi_size = fpu_user_cfg.default_size; + fpu_init_guest_permissions(gfpu); return true; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b1c45c9cda87..1a55bf700f926 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -191,7 +191,7 @@ void kvm_async_pf_task_wake(u32 token) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node *n; + struct kvm_task_sleep_node *n, *dummy = NULL; if (token == ~0) { apf_task_wake_all(); @@ -203,28 +203,41 @@ void kvm_async_pf_task_wake(u32 token) n = _find_apf_task(b, token); if (!n) { /* - * async PF was not yet handled. - * Add dummy entry for the token. + * Async #PF not yet handled, add a dummy entry for the token. + * Allocating the token must be down outside of the raw lock + * as the allocator is preemptible on PREEMPT_RT kernels. */ - n = kzalloc(sizeof(*n), GFP_ATOMIC); - if (!n) { + if (!dummy) { + raw_spin_unlock(&b->lock); + dummy = kzalloc(sizeof(*dummy), GFP_ATOMIC); + /* - * Allocation failed! Busy wait while other cpu - * handles async PF. + * Continue looping on allocation failure, eventually + * the async #PF will be handled and allocating a new + * node will be unnecessary. + */ + if (!dummy) + cpu_relax(); + + /* + * Recheck for async #PF completion before enqueueing + * the dummy token to avoid duplicate list entries. */ - raw_spin_unlock(&b->lock); - cpu_relax(); goto again; } - n->token = token; - n->cpu = smp_processor_id(); - init_swait_queue_head(&n->wq); - hlist_add_head(&n->link, &b->list); + dummy->token = token; + dummy->cpu = smp_processor_id(); + init_swait_queue_head(&dummy->wq); + hlist_add_head(&dummy->link, &b->list); + dummy = NULL; } else { apf_task_wake_one(n); } raw_spin_unlock(&b->lock); - return; + + /* A dummy token might be allocated and ultimately not used. */ + if (dummy) + kfree(dummy); } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 566bb8e171492..0611fd83858e6 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -376,9 +376,6 @@ void machine_kexec(struct kimage *image) #ifdef CONFIG_KEXEC_FILE void *arch_kexec_kernel_image_load(struct kimage *image) { - vfree(image->elf_headers); - image->elf_headers = NULL; - if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); @@ -514,6 +511,15 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} #endif /* CONFIG_KEXEC_FILE */ static int diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index b52407c56000e..879ef8c72f5c0 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -149,8 +149,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_flags) != 0x24); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_flags) != 0x18); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 0f3c307b37b3a..8e2b2552b5eea 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -180,8 +180,7 @@ void set_task_blockstep(struct task_struct *task, bool on) * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race - * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but - * PTRACE_KILL is not safe. + * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 660b78827638f..8cc653ffdccd7 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -68,9 +68,6 @@ static int __init control_va_addr_alignment(char *str) if (*str == 0) return 1; - if (*str == '=') - str++; - if (!strcmp(str, "32")) va_align.flags = ALIGN_VA_32; else if (!strcmp(str, "64")) @@ -80,11 +77,11 @@ static int __init control_va_addr_alignment(char *str) else if (!strcmp(str, "on")) va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; else - return 0; + pr_warn("invalid option value: 'align_va_addr=%s'\n", str); return 1; } -__setup("align_va_addr", control_va_addr_alignment); +__setup("align_va_addr=", control_va_addr_alignment); SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 66b0eb0bda94e..6268880c8eed6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1548,6 +1548,7 @@ static void cancel_apic_timer(struct kvm_lapic *apic) if (apic->lapic_timer.hv_timer_in_use) cancel_hv_timer(apic); preempt_enable(); + atomic_set(&apic->lapic_timer.pending, 0); } static void apic_update_lvtt(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 45e1573f8f1d3..7f9c3b0fcd0b5 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1843,17 +1843,14 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \ if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else -static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, +static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { int ret = vcpu->arch.mmu->sync_page(vcpu, sp); - if (ret < 0) { + if (ret < 0) kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); - return false; - } - - return !!ret; + return ret; } static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, @@ -1975,7 +1972,7 @@ static int mmu_sync_children(struct kvm_vcpu *vcpu, for_each_sp(pages, sp, parents, i) { kvm_unlink_unsync_page(vcpu->kvm, sp); - flush |= kvm_sync_page(vcpu, sp, &invalid_list); + flush |= kvm_sync_page(vcpu, sp, &invalid_list) > 0; mmu_pages_clear_parents(&parents); } if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { @@ -2016,6 +2013,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, struct hlist_head *sp_list; unsigned quadrant; struct kvm_mmu_page *sp; + int ret; int collisions = 0; LIST_HEAD(invalid_list); @@ -2068,11 +2066,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, * If the sync fails, the page is zapped. If so, break * in order to rebuild it. */ - if (!kvm_sync_page(vcpu, sp, &invalid_list)) + ret = kvm_sync_page(vcpu, sp, &invalid_list); + if (ret < 0) break; WARN_ON(!list_empty(&invalid_list)); - kvm_flush_remote_tlbs(vcpu->kvm); + if (ret > 0) + kvm_flush_remote_tlbs(vcpu->kvm); } __clear_sp_write_flooding_count(sp); @@ -5168,7 +5168,7 @@ static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu) roots_to_free |= KVM_MMU_ROOT_CURRENT; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { - if (is_obsolete_root(kvm, mmu->root.hpa)) + if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa)) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 01fee5f67ac37..beb3ce8d94eb3 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -144,42 +144,6 @@ static bool FNAME(is_rsvd_bits_set)(struct kvm_mmu *mmu, u64 gpte, int level) FNAME(is_bad_mt_xwr)(&mmu->guest_rsvd_check, gpte); } -static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - pt_element_t __user *ptep_user, unsigned index, - pt_element_t orig_pte, pt_element_t new_pte) -{ - signed char r; - - if (!user_access_begin(ptep_user, sizeof(pt_element_t))) - return -EFAULT; - -#ifdef CMPXCHG - asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n" - "setnz %b[r]\n" - "2:" - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) - : [ptr] "+m" (*ptep_user), - [old] "+a" (orig_pte), - [r] "=q" (r) - : [new] "r" (new_pte) - : "memory"); -#else - asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n" - "setnz %b[r]\n" - "2:" - _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_EFAULT_REG, %k[r]) - : [ptr] "+m" (*ptep_user), - [old] "+A" (orig_pte), - [r] "=q" (r) - : [new_lo] "b" ((u32)new_pte), - [new_hi] "c" ((u32)(new_pte >> 32)) - : "memory"); -#endif - - user_access_end(); - return r; -} - static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) @@ -278,7 +242,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, if (unlikely(!walker->pte_writable[level - 1])) continue; - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); + ret = __try_cmpxchg_user(ptep_user, &orig_pte, pte, fault); if (ret) return ret; diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 96bab464967f2..9a5963bff0f95 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -819,9 +819,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct kvm_host_map map; int rc; - /* Triple faults in L2 should never escape. */ - WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); if (rc) { if (rc == -EINVAL) @@ -899,7 +896,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { WARN_ON(!svm->tsc_scaling_enabled); vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); } svm->nested.ctl.nested_cr3 = 0; @@ -1296,7 +1293,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, svm->tsc_ratio_msr); - svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); } /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7c392873626fd..4b7d490c0b639 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -688,7 +688,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; - blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); + blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); if (!blob) return -ENOMEM; @@ -808,7 +808,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (!IS_ALIGNED(dst_paddr, 16) || !IS_ALIGNED(paddr, 16) || !IS_ALIGNED(size, 16)) { - tpage = (void *)alloc_page(GFP_KERNEL); + tpage = (void *)alloc_page(GFP_KERNEL | __GFP_ZERO); if (!tpage) return -ENOMEM; @@ -1094,7 +1094,7 @@ static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp) if (params.len > SEV_FW_BLOB_MAX_SIZE) return -EINVAL; - blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT); + blob = kzalloc(params.len, GFP_KERNEL_ACCOUNT); if (!blob) return -ENOMEM; @@ -1176,7 +1176,7 @@ static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp) return -EINVAL; /* allocate the memory to hold the session data blob */ - session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT); + session_data = kzalloc(params.session_len, GFP_KERNEL_ACCOUNT); if (!session_data) return -ENOMEM; @@ -1300,11 +1300,11 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* allocate memory for header and transport buffer */ ret = -ENOMEM; - hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); + hdr = kzalloc(params.hdr_len, GFP_KERNEL_ACCOUNT); if (!hdr) goto e_unpin; - trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT); + trans_data = kzalloc(params.trans_len, GFP_KERNEL_ACCOUNT); if (!trans_data) goto e_free_hdr; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7e45d03cd018a..0c0a09b43b105 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -463,11 +463,24 @@ static int has_svm(void) return 1; } +void __svm_write_tsc_multiplier(u64 multiplier) +{ + preempt_disable(); + + if (multiplier == __this_cpu_read(current_tsc_ratio)) + goto out; + + wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); + __this_cpu_write(current_tsc_ratio, multiplier); +out: + preempt_enable(); +} + static void svm_hardware_disable(void) { /* Make sure we clean up behind us */ if (tsc_scaling) - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); cpu_svm_disable(); @@ -513,8 +526,7 @@ static int svm_hardware_enable(void) * Set the default value, even if we don't use TSC scaling * to avoid having stale value in the msr */ - wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT); - __this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT); + __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); } @@ -915,11 +927,12 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) { - wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); + __svm_write_tsc_multiplier(multiplier); } + /* Evaluate instruction intercepts that depend on guest CPUID features. */ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) @@ -1276,13 +1289,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) sev_es_prepare_switch_to_guest(hostsa); } - if (tsc_scaling) { - u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; - if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { - __this_cpu_write(current_tsc_ratio, tsc_ratio); - wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); - } - } + if (tsc_scaling) + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); if (likely(tsc_aux_uret_slot >= 0)) kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f76deff71002c..34babf9185fe5 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -558,7 +558,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); -void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier); +void __svm_write_tsc_multiplier(u64 multiplier); void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 856c875638833..ee7df31883cd9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3695,12 +3695,34 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) + struct vmcs12 *vmcs12, + u32 vm_exit_reason, u32 exit_intr_info) { u32 idt_vectoring; unsigned int nr; - if (vcpu->arch.exception.injected) { + /* + * Per the SDM, VM-Exits due to double and triple faults are never + * considered to occur during event delivery, even if the double/triple + * fault is the result of an escalating vectoring issue. + * + * Note, the SDM qualifies the double fault behavior with "The original + * event results in a double-fault exception". It's unclear why the + * qualification exists since exits due to double fault can occur only + * while vectoring a different exception (injected events are never + * subject to interception), i.e. there's _always_ an original event. + * + * The SDM also uses NMI as a confusing example for the "original event + * causes the VM exit directly" clause. NMI isn't special in any way, + * the same rule applies to all events that cause an exit directly. + * NMI is an odd choice for the example because NMIs can only occur on + * instruction boundaries, i.e. they _can't_ occur during vectoring. + */ + if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT || + ((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI && + is_double_fault(exit_intr_info))) { + vmcs12->idt_vectoring_info_field = 0; + } else if (vcpu->arch.exception.injected) { nr = vcpu->arch.exception.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -3733,6 +3755,8 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, idt_vectoring |= INTR_TYPE_EXT_INTR; vmcs12->idt_vectoring_info_field = idt_vectoring; + } else { + vmcs12->idt_vectoring_info_field = 0; } } @@ -4202,12 +4226,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (to_vmx(vcpu)->exit_reason.enclave_mode) vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE; vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = exit_intr_info; - - vmcs12->idt_vectoring_info_field = 0; - vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + /* + * On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched + * and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other + * exit info fields are unmodified. + */ if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { vmcs12->launch_state = 1; @@ -4219,7 +4243,12 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Transfer the event that L0 or L1 may wanted to inject into * L2 to IDT_VECTORING_INFO_FIELD. */ - vmcs12_save_pending_event(vcpu, vmcs12); + vmcs12_save_pending_event(vcpu, vmcs12, + vm_exit_reason, exit_intr_info); + + vmcs12->vm_exit_intr_info = exit_intr_info; + vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); /* * According to spec, there's no need to store the guest's @@ -4518,9 +4547,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); - /* Similarly, triple faults in L2 should never escape. */ - WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { /* * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index e325c290a8162..2b9d7a7e83f77 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -104,6 +104,11 @@ static inline bool is_breakpoint(u32 intr_info) return is_exception_n(intr_info, BP_VECTOR); } +static inline bool is_double_fault(u32 intr_info) +{ + return is_exception_n(intr_info, DF_VECTOR); +} + static inline bool is_page_fault(u32 intr_info) { return is_exception_n(intr_info, PF_VECTOR); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 610355b9ccceb..9646ae886b4b5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -229,6 +229,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -360,6 +363,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr |= FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -2252,6 +2309,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4553,6 +4614,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); vpid_sync_context(vmx->vpid); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) @@ -6773,6 +6836,11 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); + + vmx_disable_fb_clear(vmx); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); @@ -6782,6 +6850,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + guest_state_exit_irqoff(); } @@ -7856,7 +7926,7 @@ static unsigned int vmx_handle_intel_pt_intr(void) struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); /* '0' on failure so that the !PT case can use a RET0 static call. */ - if (!kvm_arch_pmi_in_guest(vcpu)) + if (!vcpu || !kvm_handling_nmi_from_guest(vcpu)) return 0; kvm_make_request(KVM_REQ_PMI, vcpu); @@ -8182,6 +8252,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index b98c7e96697a9..8d2342ede0c59 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -348,6 +348,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control_valid_bits; /* SGX Launch Control public key hash */ u64 msr_ia32_sgxlepubkeyhash[4]; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; struct lbr_desc lbr_desc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4790f0d7d40b8..558d1f2ab5b49 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1587,6 +1587,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } @@ -7229,15 +7232,8 @@ static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, exception, &write_emultor); } -#define CMPXCHG_TYPE(t, ptr, old, new) \ - (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) - -#ifdef CONFIG_X86_64 -# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) -#else -# define CMPXCHG64(ptr, old, new) \ - (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) -#endif +#define emulator_try_cmpxchg_user(t, ptr, old, new) \ + (__try_cmpxchg_user((t __user *)(ptr), (t *)(old), *(t *)(new), efault ## t)) static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, @@ -7246,12 +7242,11 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, unsigned int bytes, struct x86_exception *exception) { - struct kvm_host_map map; struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); u64 page_line_mask; + unsigned long hva; gpa_t gpa; - char *kaddr; - bool exchanged; + int r; /* guests cmpxchg8b have to be emulated atomically */ if (bytes > 8 || (bytes & (bytes - 1))) @@ -7275,31 +7270,32 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask)) goto emul_write; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map)) + hva = kvm_vcpu_gfn_to_hva(vcpu, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) goto emul_write; - kaddr = map.hva + offset_in_page(gpa); + hva += offset_in_page(gpa); switch (bytes) { case 1: - exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); + r = emulator_try_cmpxchg_user(u8, hva, old, new); break; case 2: - exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); + r = emulator_try_cmpxchg_user(u16, hva, old, new); break; case 4: - exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); + r = emulator_try_cmpxchg_user(u32, hva, old, new); break; case 8: - exchanged = CMPXCHG64(kaddr, old, new); + r = emulator_try_cmpxchg_user(u64, hva, old, new); break; default: BUG(); } - kvm_vcpu_unmap(vcpu, &map, true); - - if (!exchanged) + if (r < 0) + goto emul_write; + if (r) return X86EMUL_CMPXCHG_FAILED; kvm_page_track_write(vcpu, gpa, new, bytes); @@ -8251,7 +8247,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); -static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) +static bool kvm_vcpu_check_code_breakpoint(struct kvm_vcpu *vcpu, int *r) { if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { @@ -8320,25 +8316,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) } /* - * Decode to be emulated instruction. Return EMULATION_OK if success. + * Decode an instruction for emulation. The caller is responsible for handling + * code breakpoints. Note, manually detecting code breakpoints is unnecessary + * (and wrong) when emulating on an intercepted fault-like exception[*], as + * code breakpoints have higher priority and thus have already been done by + * hardware. + * + * [*] Except #MC, which is higher priority, but KVM should never emulate in + * response to a machine check. */ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type, void *insn, int insn_len) { - int r = EMULATION_OK; struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; + int r; init_emulate_ctxt(vcpu); - /* - * We will reenter on the same instruction since we do not set - * complete_userspace_io. This does not handle watchpoints yet, - * those would be handled in the emulate_ops. - */ - if (!(emulation_type & EMULTYPE_SKIP) && - kvm_vcpu_check_breakpoint(vcpu, &r)) - return r; - r = x86_decode_insn(ctxt, insn, insn_len, emulation_type); trace_kvm_emulate_insn_start(vcpu); @@ -8371,6 +8365,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, if (!(emulation_type & EMULTYPE_NO_DECODE)) { kvm_clear_exception_queue(vcpu); + /* + * Return immediately if RIP hits a code breakpoint, such #DBs + * are fault-like and are higher priority than any faults on + * the code fetch itself. + */ + if (!(emulation_type & EMULTYPE_SKIP) && + kvm_vcpu_check_code_breakpoint(vcpu, &r)) + return r; + r = x86_decode_emulated_instruction(vcpu, emulation_type, insn, insn_len); if (r != EMULATION_OK) { @@ -11747,20 +11750,15 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) vcpu_put(vcpu); } -static void kvm_free_vcpus(struct kvm *kvm) +static void kvm_unload_vcpu_mmus(struct kvm *kvm) { unsigned long i; struct kvm_vcpu *vcpu; - /* - * Unpin any mmu pages first. - */ kvm_for_each_vcpu(i, vcpu, kvm) { kvm_clear_async_pf_completion_queue(vcpu); kvm_unload_vcpu_mmu(vcpu); } - - kvm_destroy_vcpus(kvm); } void kvm_arch_sync_events(struct kvm *kvm) @@ -11866,11 +11864,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm) __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); mutex_unlock(&kvm->slots_lock); } + kvm_unload_vcpu_mmus(kvm); static_call_cond(kvm_x86_vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_free_vcpus(kvm); + kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 65d15df6212d6..0e65d00e2339f 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -54,8 +54,8 @@ static void delay_loop(u64 __loops) " jnz 2b \n" "3: dec %0 \n" - : /* we don't need output */ - :"a" (loops) + : "+a" (loops) + : ); } diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 4ba2a3ee4bce1..d5ef64ddd35e9 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -101,7 +101,7 @@ int pat_debug_enable; static int __init pat_debug_setup(char *str) { pat_debug_enable = 1; - return 0; + return 1; } __setup("debugpat", pat_debug_setup); diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 0656db33574d3..1abd5438f1269 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include #include @@ -1805,7 +1805,7 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, } /* - * _set_memory_prot is an internal helper for callers that have been passed + * __set_memory_prot is an internal helper for callers that have been passed * a pgprot_t value from upper layers and a reservation has already been taken. * If you want to set the pgprot to a specific page protocol, use the * set_memory_xx() functions. @@ -1914,6 +1914,51 @@ int set_memory_wb(unsigned long addr, int numpages) } EXPORT_SYMBOL(set_memory_wb); +/* Prevent speculative access to a page by marking it not-present */ +#ifdef CONFIG_X86_64 +int set_mce_nospec(unsigned long pfn) +{ + unsigned long decoy_addr; + int rc; + + /* SGX pages are not in the 1:1 map */ + if (arch_is_platform_page(pfn << PAGE_SHIFT)) + return 0; + /* + * We would like to just call: + * set_memory_XX((unsigned long)pfn_to_kaddr(pfn), 1); + * but doing that would radically increase the odds of a + * speculative access to the poison page because we'd have + * the virtual address of the kernel 1:1 mapping sitting + * around in registers. + * Instead we get tricky. We create a non-canonical address + * that looks just like the one we want, but has bit 63 flipped. + * This relies on set_memory_XX() properly sanitizing any __pa() + * results with __PHYSICAL_MASK or PTE_PFN_MASK. + */ + decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); + + rc = set_memory_np(decoy_addr, 1); + if (rc) + pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); + return rc; +} + +static int set_memory_present(unsigned long *addr, int numpages) +{ + return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); +} + +/* Restore full speculative operation to the pfn. */ +int clear_mce_nospec(unsigned long pfn) +{ + unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); + + return set_memory_present(&addr, 1); +} +EXPORT_SYMBOL_GPL(clear_mce_nospec); +#endif /* CONFIG_X86_64 */ + int set_memory_x(unsigned long addr, int numpages) { if (!(__supported_pte_mask & _PAGE_NX)) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3481b35cb4ec7..a224193d84bff 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 97b63e35e1528..21c4bc41741fe 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -253,6 +253,15 @@ static void write_pc_conf_nybble(u8 base, u8 index, u8 val) pc_conf_set(reg, x); } +/* + * FinALi pirq rules are as follows: + * + * - bit 0 selects between INTx Routing Table Mapping Registers, + * + * - bit 3 selects the nibble within the INTx Routing Table Mapping Register, + * + * - bits 7:4 map to bits 3:0 of the PCI INTx Sensitivity Register. + */ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { @@ -260,11 +269,13 @@ static int pirq_finali_get(struct pci_dev *router, struct pci_dev *dev, 0, 9, 3, 10, 4, 5, 7, 6, 0, 11, 0, 12, 0, 14, 0, 15 }; unsigned long flags; + u8 index; u8 x; + index = (pirq & 1) << 1 | (pirq & 8) >> 3; raw_spin_lock_irqsave(&pc_conf_lock, flags); pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY); - x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1)]; + x = irqmap[read_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index)]; pc_conf_set(PC_CONF_FINALI_LOCK, 0); raw_spin_unlock_irqrestore(&pc_conf_lock, flags); return x; @@ -278,13 +289,15 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev, }; u8 val = irqmap[irq]; unsigned long flags; + u8 index; if (!val) return 0; + index = (pirq & 1) << 1 | (pirq & 8) >> 3; raw_spin_lock_irqsave(&pc_conf_lock, flags); pc_conf_set(PC_CONF_FINALI_LOCK, PC_CONF_FINALI_LOCK_KEY); - write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, pirq - 1, val); + write_pc_conf_nybble(PC_CONF_FINALI_PCI_INTX_RT1, index, val); pc_conf_set(PC_CONF_FINALI_LOCK, 0); raw_spin_unlock_irqrestore(&pc_conf_lock, flags); return 1; @@ -293,7 +306,7 @@ static int pirq_finali_set(struct pci_dev *router, struct pci_dev *dev, static int pirq_finali_lvl(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - u8 mask = ~(1u << (pirq - 1)); + u8 mask = ~((pirq & 0xf0u) >> 4); unsigned long flags; u8 trig; diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 3ee234b6234dd..255a44dd415a9 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -23,9 +23,11 @@ static long write_ldt_entry(struct mm_id *mm_idp, int func, { long res; void *stub_addr; + + BUILD_BUG_ON(sizeof(*desc) % sizeof(long)); + res = syscall_stub_data(mm_idp, (unsigned long *)desc, - (sizeof(*desc) + sizeof(long) - 1) & - ~(sizeof(long) - 1), + sizeof(*desc) / sizeof(long), addr, &stub_addr); if (!res) { unsigned long args[] = { func, diff --git a/arch/xtensa/include/asm/timex.h b/arch/xtensa/include/asm/timex.h index 233ec75e60c69..3f2462f2d0270 100644 --- a/arch/xtensa/include/asm/timex.h +++ b/arch/xtensa/include/asm/timex.h @@ -29,10 +29,6 @@ extern unsigned long ccount_freq; -typedef unsigned long long cycles_t; - -#define get_cycles() (0) - void local_timer_setup(unsigned cpu); /* @@ -59,4 +55,6 @@ static inline void set_linux_timer (unsigned long ccompare) xtensa_set_sr(ccompare, SREG_CCOMPARE + LINUX_TIMER); } +#include + #endif /* _XTENSA_TIMEX_H */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 6b6eff658795c..07d683d94e175 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -442,7 +442,6 @@ KABI_W or a3, a3, a0 moveqz a3, a0, a2 # a3 = LOCKLEVEL iff interrupt KABI_W movi a2, PS_WOE_MASK KABI_W or a3, a3, a2 - rsr a2, exccause #endif /* restore return address (or 0 if return to userspace) */ @@ -469,19 +468,27 @@ KABI_W or a3, a3, a2 save_xtregs_opt a1 a3 a4 a5 a6 a7 PT_XTREGS_OPT +#ifdef CONFIG_TRACE_IRQFLAGS + rsr abi_tmp0, ps + extui abi_tmp0, abi_tmp0, PS_INTLEVEL_SHIFT, PS_INTLEVEL_WIDTH + beqz abi_tmp0, 1f + abi_call trace_hardirqs_off +1: +#endif + /* Go to second-level dispatcher. Set up parameters to pass to the * exception handler and call the exception handler. */ - rsr a4, excsave1 - addx4 a4, a2, a4 - l32i a4, a4, EXC_TABLE_DEFAULT # load handler - mov abi_arg1, a2 # pass EXCCAUSE + l32i abi_arg1, a1, PT_EXCCAUSE # pass EXCCAUSE + rsr abi_tmp0, excsave1 + addx4 abi_tmp0, abi_arg1, abi_tmp0 + l32i abi_tmp0, abi_tmp0, EXC_TABLE_DEFAULT # load handler mov abi_arg0, a1 # pass stack frame /* Call the second-level handler */ - abi_callx a4 + abi_callx abi_tmp0 /* Jump here for exception exit */ .global common_exception_return diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 323c678a691ff..b952e67cc0ccd 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -225,12 +225,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) void user_enable_single_step(struct task_struct *child) { - child->ptrace |= PT_SINGLESTEP; + set_tsk_thread_flag(child, TIF_SINGLESTEP); } void user_disable_single_step(struct task_struct *child) { - child->ptrace &= ~PT_SINGLESTEP; + clear_tsk_thread_flag(child, TIF_SINGLESTEP); } /* diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 6f68649e86ba5..ac50ec46c8f14 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -473,7 +473,7 @@ static void do_signal(struct pt_regs *regs) /* Set up the stack frame */ ret = setup_frame(&ksig, sigmask_to_save(), regs); signal_setup_done(ret, &ksig, 0); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; @@ -499,7 +499,7 @@ static void do_signal(struct pt_regs *regs) /* If there's no signal to deliver, we just restore the saved mask. */ restore_saved_sigmask(); - if (current->ptrace & PT_SINGLESTEP) + if (test_thread_flag(TIF_SINGLESTEP)) task_pt_regs(current)->icountlevel = 1; return; } diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 52c94ab5c2058..1518e261d882e 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -421,3 +421,4 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common pmadv_ksm sys_pmadv_ksm diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 9345007d474d3..5f86208c67c87 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -242,12 +242,8 @@ DEFINE_PER_CPU(unsigned long, nmi_count); void do_nmi(struct pt_regs *regs) { - struct pt_regs *old_regs; + struct pt_regs *old_regs = set_irq_regs(regs); - if ((regs->ps & PS_INTLEVEL_MASK) < LOCKLEVEL) - trace_hardirqs_off(); - - old_regs = set_irq_regs(regs); nmi_enter(); ++*this_cpu_ptr(&nmi_count); check_valid_nmi(); @@ -269,12 +265,9 @@ void do_interrupt(struct pt_regs *regs) XCHAL_INTLEVEL6_MASK, XCHAL_INTLEVEL7_MASK, }; - struct pt_regs *old_regs; + struct pt_regs *old_regs = set_irq_regs(regs); unsigned unhandled = ~0u; - trace_hardirqs_off(); - - old_regs = set_irq_regs(regs); irq_enter(); for (;;) { diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 0f0e0724397f4..4255b92fa3eb0 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -211,12 +211,18 @@ static ssize_t proc_read_simdisk(struct file *file, char __user *buf, struct simdisk *dev = pde_data(file_inode(file)); const char *s = dev->filename; if (s) { - ssize_t n = simple_read_from_buffer(buf, size, ppos, - s, strlen(s)); - if (n < 0) - return n; - buf += n; - size -= n; + ssize_t len = strlen(s); + char *temp = kmalloc(len + 2, GFP_KERNEL); + + if (!temp) + return -ENOMEM; + + len = scnprintf(temp, len + 2, "%s\n", s); + len = simple_read_from_buffer(buf, size, ppos, + temp, len); + + kfree(temp); + return len; } return simple_read_from_buffer(buf, size, ppos, "\n", 1); } diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 420eda2589c0e..09574af835662 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -557,6 +557,7 @@ static void bfq_pd_init(struct blkg_policy_data *pd) */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; + bfqg->online = true; bfqg->rq_pos_tree = RB_ROOT; } @@ -585,28 +586,11 @@ static void bfq_group_set_parent(struct bfq_group *bfqg, entity->sched_data = &parent->sched_data; } -static struct bfq_group *bfq_lookup_bfqg(struct bfq_data *bfqd, - struct blkcg *blkcg) +static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { - struct blkcg_gq *blkg; - - blkg = blkg_lookup(blkcg, bfqd->queue); - if (likely(blkg)) - return blkg_to_bfqg(blkg); - return NULL; -} - -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg) -{ - struct bfq_group *bfqg, *parent; + struct bfq_group *parent; struct bfq_entity *entity; - bfqg = bfq_lookup_bfqg(bfqd, blkcg); - - if (unlikely(!bfqg)) - return NULL; - /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet @@ -623,8 +607,24 @@ struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, bfq_group_set_parent(curr_bfqg, parent); } } +} - return bfqg; +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + struct bfq_group *bfqg; + + while (blkg) { + bfqg = blkg_to_bfqg(blkg); + if (bfqg->online) { + bio_associate_blkg_from_css(bio, &blkg->blkcg->css); + return bfqg; + } + blkg = blkg->parent; + } + bio_associate_blkg_from_css(bio, + &bfqg_to_blkg(bfqd->root_group)->blkcg->css); + return bfqd->root_group; } /** @@ -714,25 +714,15 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) - * - * NOTE: an alternative approach might have been to store the current - * cgroup in bfqq and getting a reference to it, reducing the lookup - * time here, at the price of slightly more complex code. */ -static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, - struct bfq_io_cq *bic, - struct blkcg *blkcg) +static void *__bfq_bic_change_cgroup(struct bfq_data *bfqd, + struct bfq_io_cq *bic, + struct bfq_group *bfqg) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, 0); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, 1); - struct bfq_group *bfqg; struct bfq_entity *entity; - bfqg = bfq_find_set_group(bfqd, blkcg); - - if (unlikely(!bfqg)) - bfqg = bfqd->root_group; - if (async_bfqq) { entity = &async_bfqq->entity; @@ -743,9 +733,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, } if (sync_bfqq) { - entity = &sync_bfqq->entity; - if (entity->sched_data != &bfqg->sched_data) - bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { + /* We are the only user of this bfqq, just move it */ + if (sync_bfqq->entity.sched_data != &bfqg->sched_data) + bfq_bfqq_move(bfqd, sync_bfqq, bfqg); + } else { + struct bfq_queue *bfqq; + + /* + * The queue was merged to a different queue. Check + * that the merge chain still belongs to the same + * cgroup. + */ + for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) + if (bfqq->entity.sched_data != + &bfqg->sched_data) + break; + if (bfqq) { + /* + * Some queue changed cgroup so the merge is + * not valid anymore. We cannot easily just + * cancel the merge (by clearing new_bfqq) as + * there may be other processes using this + * queue and holding refs to all queues below + * sync_bfqq->new_bfqq. Similarly if the merge + * already happened, we need to detach from + * bfqq now so that we cannot merge bio to a + * request from the old cgroup. + */ + bfq_put_cooperator(sync_bfqq); + bfq_release_process_ref(bfqd, sync_bfqq); + bic_set_bfqq(bic, NULL, 1); + } + } } return bfqg; @@ -754,20 +774,24 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd, void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); - struct bfq_group *bfqg = NULL; + struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; - rcu_read_lock(); - serial_nr = __bio_blkcg(bio)->css.serial_nr; + serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) - goto out; + return; - bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); + /* + * New cgroup for this process. Make sure it is linked to bfq internal + * cgroup hierarchy. + */ + bfq_link_bfqg(bfqd, bfqg); + __bfq_bic_change_cgroup(bfqd, bic, bfqg); /* * Update blkg_path for bfq_log_* functions. We cache this * path, and update it here, for the following @@ -820,8 +844,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) */ blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path)); bic->blkcg_serial_nr = serial_nr; -out: - rcu_read_unlock(); } /** @@ -949,6 +971,7 @@ static void bfq_pd_offline(struct blkg_policy_data *pd) put_async_queues: bfq_put_async_queues(bfqd, bfqg); + bfqg->online = false; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -1438,7 +1461,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd) bfq_end_wr_async_queues(bfqd, bfqd->root_group); } -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, struct blkcg *blkcg) +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 1f62dbdc521ff..bf5acd8f43229 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2133,9 +2133,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (!bfqd->last_completed_rq_bfqq || bfqd->last_completed_rq_bfqq == bfqq || bfq_bfqq_has_short_ttime(bfqq) || - bfqq->dispatched > 0 || - now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC || - bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq) + now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC) return; /* @@ -2210,7 +2208,7 @@ static void bfq_add_request(struct request *rq) bfqq->queued[rq_is_sync(rq)]++; bfqd->queued++; - if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { + if (bfq_bfqq_sync(bfqq) && RQ_BIC(rq)->requests <= 1) { bfq_check_waker(bfqd, bfqq, now_ns); /* @@ -2463,10 +2461,17 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, spin_lock_irq(&bfqd->lock); - if (bic) + if (bic) { + /* + * Make sure cgroup info is uptodate for current process before + * considering the merge. + */ + bfq_bic_update_cgroup(bic, bio); + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf)); - else + } else { bfqd->bio_bfqq = NULL; + } bfqd->bio_bic = bic; ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); @@ -2496,8 +2501,6 @@ static int bfq_request_merge(struct request_queue *q, struct request **req, return ELEVATOR_NO_MERGE; } -static struct bfq_queue *bfq_init_rq(struct request *rq); - static void bfq_request_merged(struct request_queue *q, struct request *req, enum elv_merge type) { @@ -2506,7 +2509,7 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, blk_rq_pos(req) < blk_rq_pos(container_of(rb_prev(&req->rb_node), struct request, rb_node))) { - struct bfq_queue *bfqq = bfq_init_rq(req); + struct bfq_queue *bfqq = RQ_BFQQ(req); struct bfq_data *bfqd; struct request *prev, *next_rq; @@ -2558,8 +2561,8 @@ static void bfq_request_merged(struct request_queue *q, struct request *req, static void bfq_requests_merged(struct request_queue *q, struct request *rq, struct request *next) { - struct bfq_queue *bfqq = bfq_init_rq(rq), - *next_bfqq = bfq_init_rq(next); + struct bfq_queue *bfqq = RQ_BFQQ(rq), + *next_bfqq = RQ_BFQQ(next); if (!bfqq) goto remove; @@ -2764,6 +2767,14 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) if (process_refs == 0 || new_process_refs == 0) return NULL; + /* + * Make sure merged queues belong to the same parent. Parents could + * have changed since the time we decided the two queues are suitable + * for merging. + */ + if (new_bfqq->entity.parent != bfqq->entity.parent) + return NULL; + bfq_log_bfqq(bfqq->bfqd, bfqq, "scheduling merge with queue %d", new_bfqq->pid); @@ -2901,9 +2912,12 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_queue *new_bfqq = bfq_setup_merge(bfqq, stable_merge_bfqq); - bic->stably_merged = true; - if (new_bfqq && new_bfqq->bic) - new_bfqq->bic->stably_merged = true; + if (new_bfqq) { + bic->stably_merged = true; + if (new_bfqq->bic) + new_bfqq->bic->stably_merged = + true; + } return new_bfqq; } else return NULL; @@ -5310,7 +5324,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq) bfq_put_queue(bfqq); } -static void bfq_put_cooperator(struct bfq_queue *bfqq) +void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; @@ -5716,14 +5730,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bfq_queue *bfqq; struct bfq_group *bfqg; - rcu_read_lock(); - - bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); - if (!bfqg) { - bfqq = &bfqd->oom_bfqq; - goto out; - } - + bfqg = bfq_bio_bfqg(bfqd, bio); if (!is_sync) { async_bfqq = bfq_async_queue_prio(bfqd, bfqg, ioprio_class, ioprio); @@ -5769,8 +5776,6 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn) bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic); - - rcu_read_unlock(); return bfqq; } @@ -6117,6 +6122,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q, unsigned int cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ +static struct bfq_queue *bfq_init_rq(struct request *rq); + static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bool at_head) { @@ -6132,18 +6139,15 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, bfqg_stats_update_legacy_io(q, rq); #endif spin_lock_irq(&bfqd->lock); + bfqq = bfq_init_rq(rq); if (blk_mq_sched_try_insert_merge(q, rq, &free)) { spin_unlock_irq(&bfqd->lock); blk_mq_free_requests(&free); return; } - spin_unlock_irq(&bfqd->lock); - trace_block_rq_insert(rq); - spin_lock_irq(&bfqd->lock); - bfqq = bfq_init_rq(rq); if (!bfqq || at_head) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); @@ -6563,6 +6567,7 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); } bfq_finish_requeue_request_body(bfqq); + RQ_BIC(rq)->requests--; spin_unlock_irqrestore(&bfqd->lock, flags); /* @@ -6796,6 +6801,7 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq_request_allocated(bfqq); bfqq->ref++; + bic->requests++; bfq_log_bfqq(bfqd, bfqq, "get_request %p: bfqq %p, %d", rq, bfqq, bfqq->ref); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 3b83e3d1c2e58..9af8599ab90ff 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -468,6 +468,7 @@ struct bfq_io_cq { struct bfq_queue *stable_merge_bfqq; bool stably_merged; /* non splittable if true */ + unsigned int requests; /* Number of requests this process has in flight */ }; /** @@ -928,6 +929,8 @@ struct bfq_group { /* reference counter (see comments in bfq_bic_update_cgroup) */ int ref; + /* Is bfq_group still online? */ + bool online; struct bfq_entity entity; struct bfq_sched_data sched_data; @@ -979,6 +982,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd, void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, bool compensate, enum bfqq_expiration reason); void bfq_put_queue(struct bfq_queue *bfqq); +void bfq_put_cooperator(struct bfq_queue *bfqq); void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq); void bfq_schedule_dispatch(struct bfq_data *bfqd); @@ -1006,8 +1010,7 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg); void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio); void bfq_end_wr_async(struct bfq_data *bfqd); -struct bfq_group *bfq_find_set_group(struct bfq_data *bfqd, - struct blkcg *blkcg); +struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio); struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg); struct bfq_group *bfqq_group(struct bfq_queue *bfqq); struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node); diff --git a/block/bio.c b/block/bio.c index 4259125e16ab2..d3ca79c3ebdff 100644 --- a/block/bio.c +++ b/block/bio.c @@ -693,6 +693,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs) bio_alloc_cache_prune(cache, -1U); } free_percpu(bs->cache); + bs->cache = NULL; } /** @@ -1336,10 +1337,12 @@ void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio_vec src_bv = bio_iter_iovec(src, *src_iter); struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter); unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len); - void *src_buf; + void *src_buf = bvec_kmap_local(&src_bv); + void *dst_buf = bvec_kmap_local(&dst_bv); - src_buf = bvec_kmap_local(&src_bv); - memcpy_to_bvec(&dst_bv, src_buf); + memcpy(dst_buf, src_buf, bytes); + + kunmap_local(dst_buf); kunmap_local(src_buf); bio_advance_iter_single(src, src_iter, bytes); diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8dfe62786cd5f..1c52752abb084 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -905,7 +905,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) { struct blkg_iostat_set *bis = &blkg->iostat; u64 rbytes, wbytes, rios, wios, dbytes, dios; - bool has_stats = false; const char *dname; unsigned seq; int i; @@ -931,14 +930,12 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) } while (u64_stats_fetch_retry(&bis->sync, seq)); if (rbytes || wbytes || rios || wios) { - has_stats = true; seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu", rbytes, wbytes, rios, wios, dbytes, dios); } if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) { - has_stats = true; seq_printf(s, " use_delay=%d delay_nsec=%llu", atomic_read(&blkg->use_delay), atomic64_read(&blkg->delay_nsec)); @@ -950,12 +947,10 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s) if (!blkg->pd[i] || !pol->pd_stat_fn) continue; - if (pol->pd_stat_fn(blkg->pd[i], s)) - has_stats = true; + pol->pd_stat_fn(blkg->pd[i], s); } - if (has_stats) - seq_printf(s, "\n"); + seq_puts(s, "\n"); } static int blkcg_print_stat(struct seq_file *sf, void *v) @@ -1906,12 +1901,8 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg); */ void bio_clone_blkg_association(struct bio *dst, struct bio *src) { - if (src->bi_blkg) { - if (dst->bi_blkg) - blkg_put(dst->bi_blkg); - blkg_get(src->bi_blkg); - dst->bi_blkg = src->bi_blkg; - } + if (src->bi_blkg) + bio_associate_blkg_from_css(dst, &bio_blkcg(src)->css); } EXPORT_SYMBOL_GPL(bio_clone_blkg_association); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 47e1e38390c96..b56ba16fb6c57 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -63,7 +63,7 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, +typedef void (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, struct seq_file *s); struct blkcg_policy { diff --git a/block/blk-core.c b/block/blk-core.c index bc05067721523..84f7b7884d072 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -948,7 +948,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags) blk_flush_plug(current->plug, false); - if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT)) + if (bio_queue_enter(bio)) return 0; if (queue_is_mq(q)) { ret = blk_mq_poll(q, cookie, iob, flags); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index 18c68d8b9138e..56ed48d2954e6 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -54,13 +54,8 @@ static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj, container_of(attr, struct blk_ia_range_sysfs_entry, attr); struct blk_independent_access_range *iar = container_of(kobj, struct blk_independent_access_range, kobj); - ssize_t ret; - mutex_lock(&iar->queue->sysfs_lock); - ret = entry->show(iar, buf); - mutex_unlock(&iar->queue->sysfs_lock); - - return ret; + return entry->show(iar, buf); } static const struct sysfs_ops blk_ia_range_sysfs_ops = { diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9bd670999d0af..16705fbd06991 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3005,13 +3005,13 @@ static void ioc_pd_free(struct blkg_policy_data *pd) kfree(iocg); } -static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; if (!ioc->enabled) - return false; + return; if (iocg->level == 0) { unsigned vp10k = DIV64_U64_ROUND_CLOSEST( @@ -3027,7 +3027,6 @@ static bool ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) iocg->last_stat.wait_us, iocg->last_stat.indebt_us, iocg->last_stat.indelay_us); - return true; } static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2f33932e72e36..9568bf8dfe82b 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -87,7 +87,17 @@ struct iolatency_grp; struct blk_iolatency { struct rq_qos rqos; struct timer_list timer; - atomic_t enabled; + + /* + * ->enabled is the master enable switch gating the throttling logic and + * inflight tracking. The number of cgroups which have iolat enabled is + * tracked in ->enable_cnt, and ->enable is flipped on/off accordingly + * from ->enable_work with the request_queue frozen. For details, See + * blkiolatency_enable_work_fn(). + */ + bool enabled; + atomic_t enable_cnt; + struct work_struct enable_work; }; static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) @@ -95,11 +105,6 @@ static inline struct blk_iolatency *BLKIOLATENCY(struct rq_qos *rqos) return container_of(rqos, struct blk_iolatency, rqos); } -static inline bool blk_iolatency_enabled(struct blk_iolatency *blkiolat) -{ - return atomic_read(&blkiolat->enabled) > 0; -} - struct child_latency_info { spinlock_t lock; @@ -464,7 +469,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio) struct blkcg_gq *blkg = bio->bi_blkg; bool issue_as_root = bio_issue_as_root_blkg(bio); - if (!blk_iolatency_enabled(blkiolat)) + if (!blkiolat->enabled) return; while (blkg && blkg->parent) { @@ -594,7 +599,6 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) u64 window_start; u64 now; bool issue_as_root = bio_issue_as_root_blkg(bio); - bool enabled = false; int inflight = 0; blkg = bio->bi_blkg; @@ -605,8 +609,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) if (!iolat) return; - enabled = blk_iolatency_enabled(iolat->blkiolat); - if (!enabled) + if (!iolat->blkiolat->enabled) return; now = ktime_to_ns(ktime_get()); @@ -645,6 +648,7 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos) struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); del_timer_sync(&blkiolat->timer); + flush_work(&blkiolat->enable_work); blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency); kfree(blkiolat); } @@ -716,6 +720,44 @@ static void blkiolatency_timer_fn(struct timer_list *t) rcu_read_unlock(); } +/** + * blkiolatency_enable_work_fn - Enable or disable iolatency on the device + * @work: enable_work of the blk_iolatency of interest + * + * iolatency needs to keep track of the number of in-flight IOs per cgroup. This + * is relatively expensive as it involves walking up the hierarchy twice for + * every IO. Thus, if iolatency is not enabled in any cgroup for the device, we + * want to disable the in-flight tracking. + * + * We have to make sure that the counting is balanced - we don't want to leak + * the in-flight counts by disabling accounting in the completion path while IOs + * are in flight. This is achieved by ensuring that no IO is in flight by + * freezing the queue while flipping ->enabled. As this requires a sleepable + * context, ->enabled flipping is punted to this work function. + */ +static void blkiolatency_enable_work_fn(struct work_struct *work) +{ + struct blk_iolatency *blkiolat = container_of(work, struct blk_iolatency, + enable_work); + bool enabled; + + /* + * There can only be one instance of this function running for @blkiolat + * and it's guaranteed to be executed at least once after the latest + * ->enabled_cnt modification. Acting on the latest ->enable_cnt is + * sufficient. + * + * Also, we know @blkiolat is safe to access as ->enable_work is flushed + * in blkcg_iolatency_exit(). + */ + enabled = atomic_read(&blkiolat->enable_cnt); + if (enabled != blkiolat->enabled) { + blk_mq_freeze_queue(blkiolat->rqos.q); + blkiolat->enabled = enabled; + blk_mq_unfreeze_queue(blkiolat->rqos.q); + } +} + int blk_iolatency_init(struct request_queue *q) { struct blk_iolatency *blkiolat; @@ -741,17 +783,15 @@ int blk_iolatency_init(struct request_queue *q) } timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); + INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; } -/* - * return 1 for enabling iolatency, return -1 for disabling iolatency, otherwise - * return 0. - */ -static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) +static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) { struct iolatency_grp *iolat = blkg_to_lat(blkg); + struct blk_iolatency *blkiolat = iolat->blkiolat; u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; @@ -759,13 +799,15 @@ static int iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, BLKIOLATENCY_MAX_WIN_SIZE); - if (!oldval && val) - return 1; + if (!oldval && val) { + if (atomic_inc_return(&blkiolat->enable_cnt) == 1) + schedule_work(&blkiolat->enable_work); + } if (oldval && !val) { blkcg_clear_delay(blkg); - return -1; + if (atomic_dec_return(&blkiolat->enable_cnt) == 0) + schedule_work(&blkiolat->enable_work); } - return 0; } static void iolatency_clear_scaling(struct blkcg_gq *blkg) @@ -797,7 +839,6 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, u64 lat_val = 0; u64 oldval; int ret; - int enable = 0; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, buf, &ctx); if (ret) @@ -832,41 +873,12 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, blkg = ctx.blkg; oldval = iolat->min_lat_nsec; - enable = iolatency_set_min_lat_nsec(blkg, lat_val); - if (enable) { - if (!blk_get_queue(blkg->q)) { - ret = -ENODEV; - goto out; - } - - blkg_get(blkg); - } - - if (oldval != iolat->min_lat_nsec) { + iolatency_set_min_lat_nsec(blkg, lat_val); + if (oldval != iolat->min_lat_nsec) iolatency_clear_scaling(blkg); - } - ret = 0; out: blkg_conf_finish(&ctx); - if (ret == 0 && enable) { - struct iolatency_grp *tmp = blkg_to_lat(blkg); - struct blk_iolatency *blkiolat = tmp->blkiolat; - - blk_mq_freeze_queue(blkg->q); - - if (enable == 1) - atomic_inc(&blkiolat->enabled); - else if (enable == -1) - atomic_dec(&blkiolat->enabled); - else - WARN_ON_ONCE(1); - - blk_mq_unfreeze_queue(blkg->q); - - blkg_put(blkg); - blk_put_queue(blkg->q); - } return ret ?: nbytes; } @@ -891,7 +903,7 @@ static int iolatency_print_limit(struct seq_file *sf, void *v) return 0; } -static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) +static void iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) { struct latency_stat stat; int cpu; @@ -914,17 +926,16 @@ static bool iolatency_ssd_stat(struct iolatency_grp *iolat, struct seq_file *s) (unsigned long long)stat.ps.missed, (unsigned long long)stat.ps.total, iolat->rq_depth.max_depth); - return true; } -static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) +static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct iolatency_grp *iolat = pd_to_lat(pd); unsigned long long avg_lat; unsigned long long cur_win; if (!blkcg_debug_stats) - return false; + return; if (iolat->ssd) return iolatency_ssd_stat(iolat, s); @@ -937,7 +948,6 @@ static bool iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) else seq_printf(s, " depth=%u avg_lat=%llu win=%llu", iolat->rq_depth.max_depth, avg_lat, cur_win); - return true; } static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, @@ -1007,14 +1017,8 @@ static void iolatency_pd_offline(struct blkg_policy_data *pd) { struct iolatency_grp *iolat = pd_to_lat(pd); struct blkcg_gq *blkg = lat_to_blkg(iolat); - struct blk_iolatency *blkiolat = iolat->blkiolat; - int ret; - ret = iolatency_set_min_lat_nsec(blkg, 0); - if (ret == 1) - atomic_inc(&blkiolat->enabled); - if (ret == -1) - atomic_dec(&blkiolat->enabled); + iolatency_set_min_lat_nsec(blkg, 0); iolatency_clear_scaling(blkg); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 84d749511f551..de7fc69572718 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -133,7 +133,8 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv, { struct mq_inflight *mi = priv; - if ((!mi->part->bd_partno || rq->part == mi->part) && + if (rq->part && blk_do_io_stat(rq) && + (!mi->part->bd_partno || rq->part == mi->part) && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; @@ -2123,8 +2124,7 @@ static bool blk_mq_has_sqsched(struct request_queue *q) */ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) { - struct blk_mq_hw_ctx *hctx; - + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); /* * If the IO scheduler does not respect hardware queues when * dispatching, we just don't bother with multiple HW queues and @@ -2132,8 +2132,8 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) * just causes lock contention inside the scheduler and pointless cache * bouncing. */ - hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT, - raw_smp_processor_id()); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx); + if (!blk_mq_hctx_stopped(hctx)) return hctx; return NULL; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 469c483719bea..5c5f2741a95fa 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2189,13 +2189,14 @@ bool __blk_throtl_bio(struct bio *bio) } out_unlock: - spin_unlock_irq(&q->queue_lock); bio_set_flag(bio, BIO_THROTTLED); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW if (throttled || !td->track_bio_latency) bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY; #endif + spin_unlock_irq(&q->queue_lock); + rcu_read_unlock(); return throttled; } diff --git a/block/genhd.c b/block/genhd.c index b8b6759d670f0..3008ec2136543 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -385,6 +385,8 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode) if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) return -EINVAL; + if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state)) + return -EINVAL; if (disk->open_partitions) return -EBUSY; diff --git a/crypto/cryptd.c b/crypto/cryptd.c index a1bea0f4baa88..668095eca0faf 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -39,6 +39,10 @@ struct cryptd_cpu_queue { }; struct cryptd_queue { + /* + * Protected by disabling BH to allow enqueueing from softinterrupt and + * dequeuing from kworker (cryptd_queue_worker()). + */ struct cryptd_cpu_queue __percpu *cpu_queue; }; @@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue) static int cryptd_enqueue_request(struct cryptd_queue *queue, struct crypto_async_request *request) { - int cpu, err; + int err; struct cryptd_cpu_queue *cpu_queue; refcount_t *refcnt; - cpu = get_cpu(); + local_bh_disable(); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); refcnt = crypto_tfm_ctx(request->tfm); if (err == -ENOSPC) - goto out_put_cpu; + goto out; - queue_work_on(cpu, cryptd_wq, &cpu_queue->work); + queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work); if (!refcount_read(refcnt)) - goto out_put_cpu; + goto out; refcount_inc(refcnt); -out_put_cpu: - put_cpu(); +out: + local_bh_enable(); return err; } @@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work) cpu_queue = container_of(work, struct cryptd_cpu_queue, work); /* * Only handle one request at a time to avoid hogging crypto workqueue. - * preempt_disable/enable is used to prevent being preempted by - * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent - * cryptd_enqueue_request() being accessed from software interrupts. */ local_bh_disable(); - preempt_disable(); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); - preempt_enable(); local_bh_enable(); if (!req) diff --git a/crypto/drbg.c b/crypto/drbg.c index 177983b6ae38b..ea79a31014a40 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -115,7 +115,7 @@ * the SHA256 / AES 256 over other ciphers. Thus, the favored * DRBGs are the latest entries in this array. */ -static const struct drbg_core drbg_cores[] = { +const struct drbg_core drbg_cores[] = { #ifdef CONFIG_CRYPTO_DRBG_CTR { .flags = DRBG_CTR | DRBG_STRENGTH128, @@ -192,6 +192,7 @@ static const struct drbg_core drbg_cores[] = { }, #endif /* CONFIG_CRYPTO_DRBG_HMAC */ }; +EXPORT_SYMBOL(drbg_cores); static int drbg_uninstantiate(struct drbg_state *drbg); @@ -207,7 +208,7 @@ static int drbg_uninstantiate(struct drbg_state *drbg); * Return: normalized strength in *bytes* value or 32 as default * to counter programming errors */ -static inline unsigned short drbg_sec_strength(drbg_flag_t flags) +unsigned short drbg_sec_strength(drbg_flag_t flags) { switch (flags & DRBG_STRENGTH_MASK) { case DRBG_STRENGTH128: @@ -220,6 +221,7 @@ static inline unsigned short drbg_sec_strength(drbg_flag_t flags) return 32; } } +EXPORT_SYMBOL(drbg_sec_strength); /* * FIPS 140-2 continuous self test for the noise source @@ -1252,7 +1254,7 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, } /* Free all substructures in a DRBG state without the DRBG state structure */ -static inline void drbg_dealloc_state(struct drbg_state *drbg) +void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; @@ -1273,12 +1275,13 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) drbg->fips_primed = false; } } +EXPORT_SYMBOL(drbg_dealloc_state); /* * Allocate all sub-structures for a DRBG state. * The DRBG state structure must already be allocated. */ -static inline int drbg_alloc_state(struct drbg_state *drbg) +int drbg_alloc_state(struct drbg_state *drbg) { int ret = -ENOMEM; unsigned int sb_size = 0; @@ -1359,6 +1362,7 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) drbg_dealloc_state(drbg); return ret; } +EXPORT_SYMBOL(drbg_alloc_state); /************************************************************************* * DRBG interface functions @@ -1895,8 +1899,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, * * return: flags */ -static inline void drbg_convert_tfm_core(const char *cra_driver_name, - int *coreref, bool *pr) +void drbg_convert_tfm_core(const char *cra_driver_name, int *coreref, bool *pr) { int i = 0; size_t start = 0; @@ -1923,6 +1926,7 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name, } } } +EXPORT_SYMBOL(drbg_convert_tfm_core); static int drbg_kcapi_init(struct crypto_tfm *tfm) { diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c index b32ffcaad9adf..f3c6b5e15e75b 100644 --- a/crypto/ecrdsa.c +++ b/crypto/ecrdsa.c @@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 1: verify that 0 < r < q, 0 < s < q */ if (vli_is_zero(r, ndigits) || - vli_cmp(r, ctx->curve->n, ndigits) == 1 || + vli_cmp(r, ctx->curve->n, ndigits) >= 0 || vli_is_zero(s, ndigits) || - vli_cmp(s, ctx->curve->n, ndigits) == 1) + vli_cmp(s, ctx->curve->n, ndigits) >= 0) return -EKEYREJECTED; /* Step 2: calculate hash (h) of the message (passed as input) */ /* Step 3: calculate e = h \mod q */ vli_from_le64(e, digest, ndigits); - if (vli_cmp(e, ctx->curve->n, ndigits) == 1) + if (vli_cmp(e, ctx->curve->n, ndigits) >= 0) vli_sub(e, e, ctx->curve->n, ndigits); if (vli_is_zero(e, ndigits)) e[0] = 1; @@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req) /* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */ ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key, ctx->curve); - if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1) + if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0) vli_sub(cc.x, cc.x, ctx->curve->n, ndigits); /* Step 7: if R == r signature is valid */ diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index 2d115bec15aeb..e7dac734d2377 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -42,8 +42,7 @@ #include #include #include - -#include "jitterentropy.h" +#include /*************************************************************************** * Helper function diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 93bff32138238..81b80a4d3d3a0 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -133,7 +133,7 @@ struct rand_data { #define JENT_ENTROPY_SAFETY_FACTOR 64 #include -#include "jitterentropy.h" +#include /*************************************************************************** * Adaptive Proportion Test diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c index 4df337d545b73..cf31abd0ed1bb 100644 --- a/drivers/acpi/arm64/agdi.c +++ b/drivers/acpi/arm64/agdi.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "ACPI: AGDI: " fmt #include +#include #include #include #include diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index bc1454789a065..34576ab0e2e1d 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -100,6 +100,16 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr); (cpc)->cpc_entry.reg.space_id == \ ACPI_ADR_SPACE_PLATFORM_COMM) +/* Check if a CPC register is in SystemMemory */ +#define CPC_IN_SYSTEM_MEMORY(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_MEMORY) + +/* Check if a CPC register is in SystemIo */ +#define CPC_IN_SYSTEM_IO(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \ + (cpc)->cpc_entry.reg.space_id == \ + ACPI_ADR_SPACE_SYSTEM_IO) + /* Evaluates to True if reg is a NULL register descriptor */ #define IS_NULL_REG(reg) ((reg)->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY && \ (reg)->address == 0 && \ @@ -1447,6 +1457,9 @@ EXPORT_SYMBOL_GPL(cppc_set_perf); * transition latency for performance change requests. The closest we have * is the timing information from the PCCT tables which provides the info * on the number and frequency of PCC commands the platform can handle. + * + * If desired_reg is in the SystemMemory or SystemIo ACPI address space, + * then assume there is no latency. */ unsigned int cppc_get_transition_latency(int cpu_num) { @@ -1472,7 +1485,9 @@ unsigned int cppc_get_transition_latency(int cpu_num) return CPUFREQ_ETERNAL; desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF]; - if (!CPC_IN_PCC(desired_reg)) + if (CPC_IN_SYSTEM_MEMORY(desired_reg) || CPC_IN_SYSTEM_IO(desired_reg)) + return 0; + else if (!CPC_IN_PCC(desired_reg)) return CPUFREQ_ETERNAL; if (pcc_ss_id < 0) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 12bbfe8336095..2b5d53c2a0a4e 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -433,6 +433,16 @@ void acpi_init_properties(struct acpi_device *adev) acpi_extract_apple_properties(adev); } +static void acpi_free_device_properties(struct list_head *list) +{ + struct acpi_device_properties *props, *tmp; + + list_for_each_entry_safe(props, tmp, list, list) { + list_del(&props->list); + kfree(props); + } +} + static void acpi_destroy_nondev_subnodes(struct list_head *list) { struct acpi_data_node *dn, *next; @@ -445,22 +455,18 @@ static void acpi_destroy_nondev_subnodes(struct list_head *list) wait_for_completion(&dn->kobj_done); list_del(&dn->sibling); ACPI_FREE((void *)dn->data.pointer); + acpi_free_device_properties(&dn->data.properties); kfree(dn); } } void acpi_free_properties(struct acpi_device *adev) { - struct acpi_device_properties *props, *tmp; - acpi_destroy_nondev_subnodes(&adev->data.subnodes); ACPI_FREE((void *)adev->data.pointer); adev->data.of_compatible = NULL; adev->data.pointer = NULL; - list_for_each_entry_safe(props, tmp, &adev->data.properties, list) { - list_del(&props->list); - kfree(props); - } + acpi_free_device_properties(&adev->data.properties); } /** diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index c992e57b2c790..3147702710afe 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -373,6 +373,18 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "20GGA00L00"), }, }, + /* + * ASUS B1400CEAE hangs on resume from suspend (see + * https://bugzilla.kernel.org/show_bug.cgi?id=215742). + */ + { + .callback = init_default_s3, + .ident = "ASUS B1400CEAE", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ASUS EXPERTBOOK B1400CEAE"), + }, + }, {}, }; diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index a4b638bea6f16..cc2fe0618178e 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -415,19 +415,30 @@ static ssize_t acpi_data_show(struct file *filp, struct kobject *kobj, loff_t offset, size_t count) { struct acpi_data_attr *data_attr; - void *base; - ssize_t rc; + void __iomem *base; + ssize_t size; data_attr = container_of(bin_attr, struct acpi_data_attr, attr); + size = data_attr->attr.size; + + if (offset < 0) + return -EINVAL; + + if (offset >= size) + return 0; - base = acpi_os_map_memory(data_attr->addr, data_attr->attr.size); + if (count > size - offset) + count = size - offset; + + base = acpi_os_map_iomem(data_attr->addr, size); if (!base) return -ENOMEM; - rc = memory_read_from_buffer(buf, count, &offset, base, - data_attr->attr.size); - acpi_os_unmap_memory(base, data_attr->attr.size); - return rc; + memcpy_fromio(buf, base + offset, count); + + acpi_os_unmap_iomem(base, size); + + return count; } static int acpi_bert_data_init(void *th, struct acpi_data_attr *data_attr) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ca64837641be2..3d57fa84e2be8 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2003,16 +2003,16 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, return err_mask; } -static bool ata_log_supported(struct ata_device *dev, u8 log) +static int ata_log_supported(struct ata_device *dev, u8 log) { struct ata_port *ap = dev->link->ap; if (dev->horkage & ATA_HORKAGE_NO_LOG_DIR) - return false; + return 0; if (ata_read_log_page(dev, ATA_LOG_DIRECTORY, 0, ap->sector_buf, 1)) - return false; - return get_unaligned_le16(&ap->sector_buf[log * 2]) ? true : false; + return 0; + return get_unaligned_le16(&ap->sector_buf[log * 2]); } static bool ata_identify_page_supported(struct ata_device *dev, u8 page) @@ -2448,15 +2448,20 @@ static void ata_dev_config_cpr(struct ata_device *dev) struct ata_cpr_log *cpr_log = NULL; u8 *desc, *buf = NULL; - if (ata_id_major_version(dev->id) < 11 || - !ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES)) + if (ata_id_major_version(dev->id) < 11) + goto out; + + buf_len = ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES); + if (buf_len == 0) goto out; /* * Read the concurrent positioning ranges log (0x47). We can have at - * most 255 32B range descriptors plus a 64B header. + * most 255 32B range descriptors plus a 64B header. This log varies in + * size, so use the size reported in the GPL directory. Reading beyond + * the supported length will result in an error. */ - buf_len = (64 + 255 * 32 + 511) & ~511; + buf_len <<= 9; buf = kzalloc(buf_len, GFP_KERNEL); if (!buf) goto out; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 06c9d90238d9e..0ea9c3115529e 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2101,7 +2101,7 @@ static unsigned int ata_scsiop_inq_b9(struct ata_scsi_args *args, u8 *rbuf) /* SCSI Concurrent Positioning Ranges VPD page: SBC-5 rev 1 or later */ rbuf[1] = 0xb9; - put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[3]); + put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[2]); for (i = 0; i < cpr_log->nr_cpr; i++, desc += 32) { desc[0] = cpr_log->cpr[i].num; diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index ca129854a88c7..c380278874990 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -196,7 +196,7 @@ static struct { { XFER_PIO_0, "XFER_PIO_0" }, { XFER_PIO_SLOW, "XFER_PIO_SLOW" } }; -ata_bitfield_name_match(xfer,ata_xfer_names) +ata_bitfield_name_search(xfer, ata_xfer_names) /* * ATA Port attributes diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index 6b5ed3046b44d..35608a0cf552e 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -856,12 +856,14 @@ static int octeon_cf_probe(struct platform_device *pdev) int i; res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0); if (!res_dma) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start, resource_size(res_dma)); if (!cf_port->dma_base) { + put_device(&dma_dev->dev); of_node_put(dma_node); return -EINVAL; } @@ -871,6 +873,7 @@ static int octeon_cf_probe(struct platform_device *pdev) irq = i; irq_handler = octeon_cf_interrupt; } + put_device(&dma_dev->dev); } of_node_put(dma_node); } diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 97936ec49bde0..7ca47e5b3c1f4 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -617,7 +617,7 @@ int bus_add_driver(struct device_driver *drv) if (drv->bus->p->drivers_autoprobe) { error = driver_attach(drv); if (error) - goto out_unregister; + goto out_del_list; } module_add_driver(drv->owner, drv); @@ -644,6 +644,8 @@ int bus_add_driver(struct device_driver *drv) return 0; +out_del_list: + klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2ef23fce0860c..a97776ea9d990 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -564,6 +564,12 @@ ssize_t __weak cpu_show_srbds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -573,6 +579,7 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); +static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -584,6 +591,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_tsx_async_abort.attr, &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 3fc3b5940bb31..d6980f33afc44 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -257,7 +257,6 @@ DEFINE_SHOW_ATTRIBUTE(deferred_devs); int driver_deferred_probe_timeout; EXPORT_SYMBOL_GPL(driver_deferred_probe_timeout); -static DECLARE_WAIT_QUEUE_HEAD(probe_timeout_waitqueue); static int __init deferred_probe_timeout_setup(char *str) { @@ -312,7 +311,6 @@ static void deferred_probe_timeout_work_func(struct work_struct *work) list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe) dev_info(p->device, "deferred probe pending\n"); mutex_unlock(&deferred_probe_mutex); - wake_up_all(&probe_timeout_waitqueue); } static DECLARE_DELAYED_WORK(deferred_probe_timeout_work, deferred_probe_timeout_work_func); @@ -716,9 +714,6 @@ int driver_probe_done(void) */ void wait_for_device_probe(void) { - /* wait for probe timeout */ - wait_event(probe_timeout_waitqueue, !driver_deferred_probe_timeout); - /* wait for the deferred probe workqueue to finish */ flush_work(&deferred_probe_work); @@ -941,6 +936,7 @@ static void __device_attach_async_helper(void *_dev, async_cookie_t cookie) static int __device_attach(struct device *dev, bool allow_async) { int ret = 0; + bool async = false; device_lock(dev); if (dev->p->dead) { @@ -979,7 +975,7 @@ static int __device_attach(struct device *dev, bool allow_async) */ dev_dbg(dev, "scheduling asynchronous probe\n"); get_device(dev); - async_schedule_dev(__device_attach_async_helper, dev); + async = true; } else { pm_request_idle(dev); } @@ -989,6 +985,8 @@ static int __device_attach(struct device *dev, bool allow_async) } out_unlock: device_unlock(dev); + if (async) + async_schedule_dev(__device_attach_async_helper, dev); return ret; } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7222ff9b5e05c..084d67fd55cc8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -636,10 +636,9 @@ static int __add_memory_block(struct memory_block *memory) } ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, GFP_KERNEL)); - if (ret) { - put_device(&memory->dev); + if (ret) device_unregister(&memory->dev); - } + return ret; } diff --git a/drivers/base/node.c b/drivers/base/node.c index ec8bb24a5a227..0ac6376ef7a10 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -682,6 +682,7 @@ static int register_node(struct node *node, int num) */ void unregister_node(struct node *node) { + compaction_unregister_node(node); hugetlb_unregister_node(node); /* no-op, if memoryless node */ node_remove_accesses(node); node_remove_caches(node); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 1ee878d126fdf..f0e4b0ea93e8c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1997,6 +1997,7 @@ int pm_genpd_init(struct generic_pm_domain *genpd, genpd->device_count = 0; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = true; + genpd->next_wakeup = KTIME_MAX; genpd->provider = NULL; genpd->has_provider = false; genpd->accounting_time = ktime_get(); diff --git a/drivers/base/property.c b/drivers/base/property.c index c0e94cce9c294..2f6843c9612bd 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -47,12 +47,14 @@ bool fwnode_property_present(const struct fwnode_handle *fwnode, { bool ret; + if (IS_ERR_OR_NULL(fwnode)) + return false; + ret = fwnode_call_bool_op(fwnode, property_present, propname); - if (ret == false && !IS_ERR_OR_NULL(fwnode) && - !IS_ERR_OR_NULL(fwnode->secondary)) - ret = fwnode_call_bool_op(fwnode->secondary, property_present, - propname); - return ret; + if (ret) + return ret; + + return fwnode_call_bool_op(fwnode->secondary, property_present, propname); } EXPORT_SYMBOL_GPL(fwnode_property_present); @@ -232,15 +234,16 @@ static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode, { int ret; + if (IS_ERR_OR_NULL(fwnode)) + return -EINVAL; + ret = fwnode_call_int_op(fwnode, property_read_int_array, propname, elem_size, val, nval); - if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && - !IS_ERR_OR_NULL(fwnode->secondary)) - ret = fwnode_call_int_op( - fwnode->secondary, property_read_int_array, propname, - elem_size, val, nval); + if (ret != -EINVAL) + return ret; - return ret; + return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname, + elem_size, val, nval); } /** @@ -371,14 +374,16 @@ int fwnode_property_read_string_array(const struct fwnode_handle *fwnode, { int ret; + if (IS_ERR_OR_NULL(fwnode)) + return -EINVAL; + ret = fwnode_call_int_op(fwnode, property_read_string_array, propname, val, nval); - if (ret == -EINVAL && !IS_ERR_OR_NULL(fwnode) && - !IS_ERR_OR_NULL(fwnode->secondary)) - ret = fwnode_call_int_op(fwnode->secondary, - property_read_string_array, propname, - val, nval); - return ret; + if (ret != -EINVAL) + return ret; + + return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname, + val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_string_array); @@ -480,15 +485,19 @@ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, { int ret; + if (IS_ERR_OR_NULL(fwnode)) + return -ENOENT; + ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop, nargs, index, args); + if (ret == 0) + return ret; - if (ret < 0 && !IS_ERR_OR_NULL(fwnode) && - !IS_ERR_OR_NULL(fwnode->secondary)) - ret = fwnode_call_int_op(fwnode->secondary, get_reference_args, - prop, nargs_prop, nargs, index, args); + if (IS_ERR_OR_NULL(fwnode->secondary)) + return ret; - return ret; + return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop, + nargs, index, args); } EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args); @@ -635,12 +644,13 @@ EXPORT_SYMBOL_GPL(fwnode_count_parents); struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode, unsigned int depth) { - unsigned int i; - fwnode_handle_get(fwnode); - for (i = 0; i < depth && fwnode; i++) + do { + if (depth-- == 0) + break; fwnode = fwnode_get_next_parent(fwnode); + } while (fwnode); return fwnode; } @@ -659,17 +669,17 @@ EXPORT_SYMBOL_GPL(fwnode_get_nth_parent); bool fwnode_is_ancestor_of(struct fwnode_handle *test_ancestor, struct fwnode_handle *test_child) { - if (!test_ancestor) + if (IS_ERR_OR_NULL(test_ancestor)) return false; fwnode_handle_get(test_child); - while (test_child) { + do { if (test_child == test_ancestor) { fwnode_handle_put(test_child); return true; } test_child = fwnode_get_next_parent(test_child); - } + } while (test_child); return false; } @@ -698,7 +708,7 @@ fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode, { struct fwnode_handle *next_child = child; - if (!fwnode) + if (IS_ERR_OR_NULL(fwnode)) return NULL; do { @@ -722,16 +732,16 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev, const struct fwnode_handle *fwnode = dev_fwnode(dev); struct fwnode_handle *next; + if (IS_ERR_OR_NULL(fwnode)) + return NULL; + /* Try to find a child in primary fwnode */ next = fwnode_get_next_child_node(fwnode, child); if (next) return next; /* When no more children in primary, continue with secondary */ - if (fwnode && !IS_ERR_OR_NULL(fwnode->secondary)) - next = fwnode_get_next_child_node(fwnode->secondary, child); - - return next; + return fwnode_get_next_child_node(fwnode->secondary, child); } EXPORT_SYMBOL_GPL(device_get_next_child_node); @@ -798,6 +808,9 @@ EXPORT_SYMBOL_GPL(fwnode_handle_put); */ bool fwnode_device_is_available(const struct fwnode_handle *fwnode) { + if (IS_ERR_OR_NULL(fwnode)) + return false; + if (!fwnode_has_op(fwnode, device_is_available)) return true; @@ -988,14 +1001,14 @@ fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, parent = fwnode_graph_get_port_parent(prev); else parent = fwnode; + if (IS_ERR_OR_NULL(parent)) + return NULL; ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev); + if (ep) + return ep; - if (IS_ERR_OR_NULL(ep) && - !IS_ERR_OR_NULL(parent) && !IS_ERR_OR_NULL(parent->secondary)) - ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL); - - return ep; + return fwnode_graph_get_next_endpoint(parent->secondary, NULL); } EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 4b0b25cc916ee..57b23e49ee91b 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -903,31 +903,6 @@ void drbd_gen_and_send_sync_uuid(struct drbd_peer_device *peer_device) } } -/* communicated if (agreed_features & DRBD_FF_WSAME) */ -static void -assign_p_sizes_qlim(struct drbd_device *device, struct p_sizes *p, - struct request_queue *q) -{ - if (q) { - p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); - p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); - p->qlim->alignment_offset = cpu_to_be32(queue_alignment_offset(q)); - p->qlim->io_min = cpu_to_be32(queue_io_min(q)); - p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); - p->qlim->discard_enabled = blk_queue_discard(q); - p->qlim->write_same_capable = 0; - } else { - q = device->rq_queue; - p->qlim->physical_block_size = cpu_to_be32(queue_physical_block_size(q)); - p->qlim->logical_block_size = cpu_to_be32(queue_logical_block_size(q)); - p->qlim->alignment_offset = 0; - p->qlim->io_min = cpu_to_be32(queue_io_min(q)); - p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); - p->qlim->discard_enabled = 0; - p->qlim->write_same_capable = 0; - } -} - int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enum dds_flags flags) { struct drbd_device *device = peer_device->device; @@ -949,7 +924,9 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu memset(p, 0, packet_size); if (get_ldev_if_state(device, D_NEGOTIATING)) { - struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); + struct block_device *bdev = device->ldev->backing_bdev; + struct request_queue *q = bdev_get_queue(bdev); + d_size = drbd_get_max_capacity(device->ldev); rcu_read_lock(); u_size = rcu_dereference(device->ldev->disk_conf)->disk_size; @@ -957,14 +934,32 @@ int drbd_send_sizes(struct drbd_peer_device *peer_device, int trigger_reply, enu q_order_type = drbd_queue_order_type(device); max_bio_size = queue_max_hw_sectors(q) << 9; max_bio_size = min(max_bio_size, DRBD_MAX_BIO_SIZE); - assign_p_sizes_qlim(device, p, q); + p->qlim->physical_block_size = + cpu_to_be32(bdev_physical_block_size(bdev)); + p->qlim->logical_block_size = + cpu_to_be32(bdev_logical_block_size(bdev)); + p->qlim->alignment_offset = + cpu_to_be32(bdev_alignment_offset(bdev)); + p->qlim->io_min = cpu_to_be32(bdev_io_min(bdev)); + p->qlim->io_opt = cpu_to_be32(bdev_io_opt(bdev)); + p->qlim->discard_enabled = blk_queue_discard(q); put_ldev(device); } else { + struct request_queue *q = device->rq_queue; + + p->qlim->physical_block_size = + cpu_to_be32(queue_physical_block_size(q)); + p->qlim->logical_block_size = + cpu_to_be32(queue_logical_block_size(q)); + p->qlim->alignment_offset = 0; + p->qlim->io_min = cpu_to_be32(queue_io_min(q)); + p->qlim->io_opt = cpu_to_be32(queue_io_opt(q)); + p->qlim->discard_enabled = 0; + d_size = 0; u_size = 0; q_order_type = QUEUE_ORDERED_NONE; max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */ - assign_p_sizes_qlim(device, p, NULL); } if (peer_device->connection->agreed_pro_version <= 94) @@ -3586,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd) * when we want to support more than * one PRO_VERSION */ static const char *cmdnames[] = { + [P_DATA] = "Data", - [P_WSAME] = "WriteSame", - [P_TRIM] = "Trim", [P_DATA_REPLY] = "DataReply", [P_RS_DATA_REPLY] = "RSDataReply", [P_BARRIER] = "Barrier", @@ -3599,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd) [P_DATA_REQUEST] = "DataRequest", [P_RS_DATA_REQUEST] = "RSDataRequest", [P_SYNC_PARAM] = "SyncParam", - [P_SYNC_PARAM89] = "SyncParam89", [P_PROTOCOL] = "ReportProtocol", [P_UUIDS] = "ReportUUIDs", [P_SIZES] = "ReportSizes", @@ -3607,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd) [P_SYNC_UUID] = "ReportSyncUUID", [P_AUTH_CHALLENGE] = "AuthChallenge", [P_AUTH_RESPONSE] = "AuthResponse", + [P_STATE_CHG_REQ] = "StateChgRequest", [P_PING] = "Ping", [P_PING_ACK] = "PingAck", [P_RECV_ACK] = "RecvAck", @@ -3617,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd) [P_NEG_DREPLY] = "NegDReply", [P_NEG_RS_DREPLY] = "NegRSDReply", [P_BARRIER_ACK] = "BarrierAck", - [P_STATE_CHG_REQ] = "StateChgRequest", [P_STATE_CHG_REPLY] = "StateChgReply", [P_OV_REQUEST] = "OVRequest", [P_OV_REPLY] = "OVReply", [P_OV_RESULT] = "OVResult", [P_CSUM_RS_REQUEST] = "CsumRSRequest", [P_RS_IS_IN_SYNC] = "CsumRSIsInSync", + [P_SYNC_PARAM89] = "SyncParam89", [P_COMPRESSED_BITMAP] = "CBitmap", [P_DELAY_PROBE] = "DelayProbe", [P_OUT_OF_SYNC] = "OutOfSync", - [P_RETRY_WRITE] = "RetryWrite", [P_RS_CANCEL] = "RSCancel", [P_CONN_ST_CHG_REQ] = "conn_st_chg_req", [P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply", [P_PROTOCOL_UPDATE] = "protocol_update", + [P_TRIM] = "Trim", [P_RS_THIN_REQ] = "rs_thin_req", [P_RS_DEALLOCATED] = "rs_deallocated", + [P_WSAME] = "WriteSame", + [P_ZEROES] = "Zeroes", /* enum drbd_packet, but not commands - obsoleted flags: * P_MAY_IGNORE diff --git a/drivers/block/loop.c b/drivers/block/loop.c index a58595f5ee2c8..4e1dce3beab0c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1066,7 +1066,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, lo->lo_flags |= LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; if (partscan) - lo->lo_disk->flags &= ~GENHD_FL_NO_PART; + clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); loop_global_unlock(lo, is_loop); if (partscan) @@ -1185,7 +1185,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release) */ lo->lo_flags = 0; if (!part_shift) - lo->lo_disk->flags |= GENHD_FL_NO_PART; + set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); mutex_lock(&lo->lo_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&lo->lo_mutex); @@ -1295,7 +1295,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_disk->flags &= ~GENHD_FL_NO_PART; + clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); partscan = true; } out_unlock: @@ -1768,6 +1768,14 @@ static void lo_release(struct gendisk *disk, fmode_t mode) mutex_unlock(&lo->lo_mutex); } +static void lo_free_disk(struct gendisk *disk) +{ + struct loop_device *lo = disk->private_data; + + mutex_destroy(&lo->lo_mutex); + kfree(lo); +} + static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, @@ -1776,6 +1784,7 @@ static const struct block_device_operations lo_fops = { #ifdef CONFIG_COMPAT .compat_ioctl = lo_compat_ioctl, #endif + .free_disk = lo_free_disk, }; /* @@ -2045,7 +2054,7 @@ static int loop_add(int i) * userspace tools. Parameters like this in general should be avoided. */ if (!part_shift) - disk->flags |= GENHD_FL_NO_PART; + set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); atomic_set(&lo->lo_refcnt, 0); mutex_init(&lo->lo_mutex); lo->lo_number = i; @@ -2090,15 +2099,14 @@ static void loop_remove(struct loop_device *lo) { /* Make this loop device unreachable from pathname. */ del_gendisk(lo->lo_disk); - blk_cleanup_disk(lo->lo_disk); + blk_cleanup_queue(lo->lo_disk->queue); blk_mq_free_tag_set(&lo->tag_set); mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); mutex_unlock(&loop_ctl_mutex); - /* There is no route which can find this loop device. */ - mutex_destroy(&lo->lo_mutex); - kfree(lo); + + put_disk(lo->lo_disk); } static void loop_probe(dev_t dev) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5a1f98494dddf..ee5adca0ba7b9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -404,13 +404,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, if (!mutex_trylock(&cmd->lock)) return BLK_EH_RESET_TIMER; - if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { mutex_unlock(&cmd->lock); return BLK_EH_DONE; } if (!refcount_inc_not_zero(&nbd->config_refs)) { cmd->status = BLK_STS_TIMEOUT; + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); goto done; } @@ -479,6 +480,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n"); set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags); cmd->status = BLK_STS_IOERR; + __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags); mutex_unlock(&cmd->lock); sock_shutdown(nbd); nbd_config_put(nbd); @@ -746,7 +748,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index, cmd = blk_mq_rq_to_pdu(req); mutex_lock(&cmd->lock); - if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { + if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) { dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)", tag, cmd->status, cmd->flags); ret = -ENOENT; @@ -855,8 +857,16 @@ static void recv_work(struct work_struct *work) } rq = blk_mq_rq_from_pdu(cmd); - if (likely(!blk_should_fake_timeout(rq->q))) - blk_mq_complete_request(rq); + if (likely(!blk_should_fake_timeout(rq->q))) { + bool complete; + + mutex_lock(&cmd->lock); + complete = __test_and_clear_bit(NBD_CMD_INFLIGHT, + &cmd->flags); + mutex_unlock(&cmd->lock); + if (complete) + blk_mq_complete_request(rq); + } percpu_ref_put(&q->q_usage_counter); } @@ -947,11 +957,15 @@ static int wait_for_reconnect(struct nbd_device *nbd) struct nbd_config *config = nbd->config; if (!config->dead_conn_timeout) return 0; - if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags)) + + if (!wait_event_timeout(config->conn_wait, + test_bit(NBD_RT_DISCONNECTED, + &config->runtime_flags) || + atomic_read(&config->live_connections) > 0, + config->dead_conn_timeout)) return 0; - return wait_event_timeout(config->conn_wait, - atomic_read(&config->live_connections) > 0, - config->dead_conn_timeout) > 0; + + return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags); } static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) @@ -1420,7 +1434,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b static void nbd_clear_sock_ioctl(struct nbd_device *nbd, struct block_device *bdev) { - sock_shutdown(nbd); + nbd_clear_sock(nbd); __invalidate_device(bdev, true); nbd_bdev_reset(bdev); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, @@ -1519,15 +1533,20 @@ static struct nbd_config *nbd_alloc_config(void) { struct nbd_config *config; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-ENODEV); + config = kzalloc(sizeof(struct nbd_config), GFP_NOFS); - if (!config) - return NULL; + if (!config) { + module_put(THIS_MODULE); + return ERR_PTR(-ENOMEM); + } + atomic_set(&config->recv_threads, 0); init_waitqueue_head(&config->recv_wq); init_waitqueue_head(&config->conn_wait); config->blksize_bits = NBD_DEF_BLKSIZE_BITS; atomic_set(&config->live_connections, 0); - try_module_get(THIS_MODULE); return config; } @@ -1554,12 +1573,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode) mutex_unlock(&nbd->config_lock); goto out; } - config = nbd->config = nbd_alloc_config(); - if (!config) { - ret = -ENOMEM; + config = nbd_alloc_config(); + if (IS_ERR(config)) { + ret = PTR_ERR(config); mutex_unlock(&nbd->config_lock); goto out; } + nbd->config = config; refcount_set(&nbd->config_refs, 1); refcount_inc(&nbd->refs); mutex_unlock(&nbd->config_lock); @@ -1800,17 +1820,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) refcount_set(&nbd->refs, 0); INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; - - /* Too big first_minor can cause duplicate creation of - * sysfs files/links, since index << part_shift might overflow, or - * MKDEV() expect that the max bits of first_minor is 20. - */ disk->first_minor = index << part_shift; - if (disk->first_minor < index || disk->first_minor > MINORMASK) { - err = -EINVAL; - goto out_free_work; - } - disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; @@ -1915,8 +1925,19 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (info->attrs[NBD_ATTR_INDEX]) + if (info->attrs[NBD_ATTR_INDEX]) { index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + /* + * Too big first_minor can cause duplicate creation of + * sysfs files/links, since index << part_shift might overflow, or + * MKDEV() expect that the max bits of first_minor is 20. + */ + if (index < 0 || index > MINORMASK >> part_shift) { + printk(KERN_ERR "nbd: illegal input index %d\n", index); + return -EINVAL; + } + } if (!info->attrs[NBD_ATTR_SOCKETS]) { printk(KERN_ERR "nbd: must specify at least one socket\n"); return -EINVAL; @@ -1966,13 +1987,14 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return -EINVAL; } - config = nbd->config = nbd_alloc_config(); - if (!nbd->config) { + config = nbd_alloc_config(); + if (IS_ERR(config)) { mutex_unlock(&nbd->config_lock); nbd_put(nbd); printk(KERN_ERR "nbd: couldn't allocate config\n"); - return -ENOMEM; + return PTR_ERR(config); } + nbd->config = config; refcount_set(&nbd->config_refs, 1); set_bit(NBD_RT_BOUND, &config->runtime_flags); @@ -2082,6 +2104,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); sock_shutdown(nbd); + wake_up(&nbd->config->conn_wait); /* * Make sure recv thread has finished, we can safely call nbd_clear_que() * to cancel the inflight I/Os. @@ -2529,6 +2552,12 @@ static void __exit nbd_cleanup(void) struct nbd_device *nbd; LIST_HEAD(del_list); + /* + * Unregister netlink interface prior to waiting + * for the completion of netlink commands. + */ + genl_unregister_family(&nbd_genl_family); + nbd_dbg_close(); mutex_lock(&nbd_index_mutex); @@ -2538,6 +2567,9 @@ static void __exit nbd_cleanup(void) while (!list_empty(&del_list)) { nbd = list_first_entry(&del_list, struct nbd_device, list); list_del_init(&nbd->list); + if (refcount_read(&nbd->config_refs)) + printk(KERN_ERR "nbd: possibly leaking nbd_config (ref %d)\n", + refcount_read(&nbd->config_refs)); if (refcount_read(&nbd->refs) != 1) printk(KERN_ERR "nbd: possibly leaking a device\n"); nbd_put(nbd); @@ -2547,7 +2579,6 @@ static void __exit nbd_cleanup(void) destroy_workqueue(nbd_del_wq); idr_destroy(&nbd_index_idr); - genl_unregister_family(&nbd_genl_family); unregister_blkdev(NBD_MAJOR, "nbd"); } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a8bcf3f664af1..10bba1e00f2b4 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev) blk_queue_io_opt(q, blk_size * opt_io_size); if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { - q->limits.discard_granularity = blk_size; - virtio_cread(vdev, struct virtio_blk_config, discard_sector_alignment, &v); - q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0; + if (v) + q->limits.discard_granularity = v << SECTOR_SHIFT; + else + q->limits.discard_granularity = blk_size; virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors, &v); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index f3dc5881fff70..d6700efcfe8cd 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -379,6 +379,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) { struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); struct hci_event_hdr *hdr = (void *)skb->data; + u8 evt = hdr->evt; int err; /* When someone waits for the WMT event, the skb is being cloned @@ -396,7 +397,7 @@ static int btmtksdio_recv_event(struct hci_dev *hdev, struct sk_buff *skb) if (err < 0) goto err_free_skb; - if (hdr->evt == HCI_EV_WMT) { + if (evt == HCI_EV_WMT) { if (test_and_clear_bit(BTMTKSDIO_TX_WAIT_VND_EVT, &bdev->tx_state)) { /* Barrier to sync with other CPUs */ @@ -863,6 +864,14 @@ static int mt79xx_setup(struct hci_dev *hdev, const char *fwname) return err; } + err = btmtksdio_fw_pmctrl(bdev); + if (err < 0) + return err; + + err = btmtksdio_drv_pmctrl(bdev); + if (err < 0) + return err; + /* Enable Bluetooth protocol */ wmt_params.op = BTMTK_WMT_FUNC_CTRL; wmt_params.flag = 0; @@ -961,7 +970,7 @@ static int btmtksdio_get_codec_config_data(struct hci_dev *hdev, } *ven_data = kmalloc(sizeof(__u8), GFP_KERNEL); - if (!ven_data) { + if (!*ven_data) { err = -ENOMEM; goto error; } @@ -1108,14 +1117,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) if (err < 0) return err; - err = btmtksdio_fw_pmctrl(bdev); - if (err < 0) - return err; - - err = btmtksdio_drv_pmctrl(bdev); - if (err < 0) - return err; - /* Enable SCO over I2S/PCM */ err = btmtksdio_sco_setting(hdev); if (err < 0) { @@ -1188,6 +1189,10 @@ static int btmtksdio_shutdown(struct hci_dev *hdev) */ pm_runtime_get_sync(bdev->dev); + /* wmt command only works until the reset is complete */ + if (test_bit(BTMTKSDIO_HW_RESET_ACTIVE, &bdev->tx_state)) + goto ignore_wmt_cmd; + /* Disable the device */ wmt_params.op = BTMTK_WMT_FUNC_CTRL; wmt_params.flag = 0; @@ -1201,6 +1206,7 @@ static int btmtksdio_shutdown(struct hci_dev *hdev) return err; } +ignore_wmt_cmd: pm_runtime_put_noidle(bdev->dev); pm_runtime_disable(bdev->dev); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 50df417207afd..e48c3ad069bb4 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3335,6 +3335,12 @@ static int btusb_setup_qca(struct hci_dev *hdev) msleep(QCA_BT_RESET_WAIT_MS); } + /* Mark HCI_OP_ENHANCED_SETUP_SYNC_CONN as broken as it doesn't seem to + * work with the likes of HSP/HFP mSBC. + */ + set_bit(HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); + return 0; } diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index f6e91fb432a3b..eab34e24d9446 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -696,9 +696,9 @@ static int qca_close(struct hci_uart *hu) skb_queue_purge(&qca->tx_wait_q); skb_queue_purge(&qca->txq); skb_queue_purge(&qca->rx_memdump_q); - del_timer(&qca->tx_idle_timer); - del_timer(&qca->wake_retrans_timer); destroy_workqueue(qca->workqueue); + del_timer_sync(&qca->tx_idle_timer); + del_timer_sync(&qca->wake_retrans_timer); qca->hu = NULL; kfree_skb(qca->rx_skb); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 7a1b1f9e49333..70d00cea9d22a 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -3395,7 +3395,9 @@ static int sysc_remove(struct platform_device *pdev) struct sysc *ddata = platform_get_drvdata(pdev); int error; - cancel_delayed_work_sync(&ddata->idle_work); + /* Device can still be enabled, see deferred idle quirk in probe */ + if (cancel_delayed_work_sync(&ddata->idle_work)) + ti_sysc_idle(&ddata->idle_work.work); error = pm_runtime_resume_and_get(ddata->dev); if (error < 0) { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 55f48375e3fe5..93d52a419470a 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -452,4 +452,6 @@ config RANDOM_TRUST_BOOTLOADER only mixes the entropy pool. This can also be configured at boot with "random.trust_bootloader=on/off". +source "drivers/char/lrng/Kconfig" + endmenu diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 264eb398fdd4f..c2676f5d5c964 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -3,7 +3,8 @@ # Makefile for the kernel character device drivers. # -obj-y += mem.o random.o +obj-y += mem.o +obj-$(CONFIG_RANDOM_DEFAULT_IMPL) += random.o obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o obj-y += misc.o obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o @@ -46,3 +47,5 @@ obj-$(CONFIG_PS3_FLASH) += ps3flash.o obj-$(CONFIG_XILLYBUS_CLASS) += xillybus/ obj-$(CONFIG_POWERNV_OP_PANEL) += powernv-op-panel.o obj-$(CONFIG_ADI) += adi.o + +obj-$(CONFIG_LRNG) += lrng/ diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c index 35001c63648bb..a01e9307737c5 100644 --- a/drivers/char/hw_random/cn10k-rng.c +++ b/drivers/char/hw_random/cn10k-rng.c @@ -31,26 +31,23 @@ struct cn10k_rng { #define PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE 0xc2000b0f -static int reset_rng_health_state(struct cn10k_rng *rng) +static unsigned long reset_rng_health_state(struct cn10k_rng *rng) { struct arm_smccc_res res; /* Send SMC service call to reset EBG health state */ arm_smccc_smc(PLAT_OCTEONTX_RESET_RNG_EBG_HEALTH_STATE, 0, 0, 0, 0, 0, 0, 0, &res); - if (res.a0 != 0UL) - return -EIO; - - return 0; + return res.a0; } static int check_rng_health(struct cn10k_rng *rng) { u64 status; - int err; + unsigned long err; /* Skip checking health */ if (!rng->reg_base) - return 0; + return -ENODEV; status = readq(rng->reg_base + RNM_PF_EBG_HEALTH); if (status & BIT_ULL(20)) { @@ -58,7 +55,9 @@ static int check_rng_health(struct cn10k_rng *rng) if (err) { dev_err(&rng->pdev->dev, "HWRNG: Health test failed (status=%llx)\n", status); - dev_err(&rng->pdev->dev, "HWRNG: error during reset\n"); + dev_err(&rng->pdev->dev, "HWRNG: error during reset (error=%lx)\n", + err); + return -EIO; } } return 0; @@ -90,6 +89,7 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data, { struct cn10k_rng *rng = (struct cn10k_rng *)hwrng->priv; unsigned int size; + u8 *pos = data; int err = 0; u64 value; @@ -102,17 +102,20 @@ static int cn10k_rng_read(struct hwrng *hwrng, void *data, while (size >= 8) { cn10k_read_trng(rng, &value); - *((u64 *)data) = (u64)value; + *((u64 *)pos) = value; size -= 8; - data += 8; + pos += 8; } - while (size > 0) { + if (size > 0) { cn10k_read_trng(rng, &value); - *((u8 *)data) = (u8)value; - size--; - data++; + while (size > 0) { + *pos = (u8)value; + value >>= 8; + size--; + pos++; + } } return max - size; diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index e0d77fa048fb6..f06e4f95114f9 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -92,7 +92,7 @@ static int __maybe_unused omap_rom_rng_runtime_resume(struct device *dev) r = ddata->rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT); if (r != 0) { - clk_disable(ddata->clk); + clk_disable_unprepare(ddata->clk); dev_err(dev, "HW init failed: %d\n", r); return -EIO; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index e856df7e285c7..a6f3a8a2aca6d 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -159,6 +159,8 @@ static int probe_common(struct virtio_device *vdev) goto err_find; } + virtio_device_ready(vdev); + /* we always have a pending entropy request */ request_entropy(vi); diff --git a/drivers/char/ipmi/ipmi_ipmb.c b/drivers/char/ipmi/ipmi_ipmb.c index b81b862532fb0..a8bfe0ade082b 100644 --- a/drivers/char/ipmi/ipmi_ipmb.c +++ b/drivers/char/ipmi/ipmi_ipmb.c @@ -476,6 +476,7 @@ static int ipmi_ipmb_probe(struct i2c_client *client, slave_np = of_parse_phandle(dev->of_node, "slave-dev", 0); if (slave_np) { slave_adap = of_get_i2c_adapter_by_node(slave_np); + of_node_put(slave_np); if (!slave_adap) { dev_notice(&client->dev, "Could not find slave adapter\n"); diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index f1827257ef0e0..2610e809c802b 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -11,8 +11,8 @@ * Copyright 2002 MontaVista Software Inc. */ -#define pr_fmt(fmt) "%s" fmt, "IPMI message handler: " -#define dev_fmt pr_fmt +#define pr_fmt(fmt) "IPMI message handler: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index bc3a18daf97a6..62e71c46ac5f7 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -94,9 +94,7 @@ static void dummy_recv_free(struct ipmi_recv_msg *msg) { atomic_dec(&dummy_count); } -static struct ipmi_smi_msg halt_smi_msg = { - .done = dummy_smi_free -}; +static struct ipmi_smi_msg halt_smi_msg = INIT_IPMI_SMI_MSG(dummy_smi_free); static struct ipmi_recv_msg halt_recv_msg = { .done = dummy_recv_free }; diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index f199cc1948446..64c73ea9c9151 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -814,6 +814,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_EVENTS: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting events\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -838,6 +846,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, break; case SSIF_GETTING_MESSAGES: + if (!msg) { + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "No message set while getting messages\n"); + ipmi_ssif_unlock_cond(ssif_info, flags); + break; + } + if ((result < 0) || (len < 3) || (msg->rsp[2] != 0)) { /* Error getting event, probably done. */ msg->done(msg); @@ -861,6 +877,13 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, deliver_recv_msg(ssif_info, msg); } break; + + default: + /* Should never happen, but just in case. */ + dev_warn(&ssif_info->client->dev, + "Invalid state in message done handling: %d\n", + ssif_info->ssif_state); + ipmi_ssif_unlock_cond(ssif_info, flags); } flags = ipmi_ssif_lock_cond(ssif_info, &oflags); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 0604abdd249a1..4c1e9663ea479 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -354,9 +354,7 @@ static void msg_free_recv(struct ipmi_recv_msg *msg) complete(&msg_wait); } } -static struct ipmi_smi_msg smi_msg = { - .done = msg_free_smi -}; +static struct ipmi_smi_msg smi_msg = INIT_IPMI_SMI_MSG(msg_free_smi); static struct ipmi_recv_msg recv_msg = { .done = msg_free_recv }; @@ -475,9 +473,8 @@ static void panic_recv_free(struct ipmi_recv_msg *msg) atomic_dec(&panic_done_count); } -static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = { - .done = panic_smi_free -}; +static struct ipmi_smi_msg panic_halt_heartbeat_smi_msg = + INIT_IPMI_SMI_MSG(panic_smi_free); static struct ipmi_recv_msg panic_halt_heartbeat_recv_msg = { .done = panic_recv_free }; @@ -516,9 +513,8 @@ static void panic_halt_ipmi_heartbeat(void) atomic_sub(2, &panic_done_count); } -static struct ipmi_smi_msg panic_halt_smi_msg = { - .done = panic_smi_free -}; +static struct ipmi_smi_msg panic_halt_smi_msg = + INIT_IPMI_SMI_MSG(panic_smi_free); static struct ipmi_recv_msg panic_halt_recv_msg = { .done = panic_recv_free }; diff --git a/drivers/char/lrng/Kconfig b/drivers/char/lrng/Kconfig new file mode 100644 index 0000000000000..bfd6b3ea4f1be --- /dev/null +++ b/drivers/char/lrng/Kconfig @@ -0,0 +1,907 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Linux Random Number Generator configuration +# + +config RANDOM_DEFAULT_IMPL + bool "Kernel RNG Default Implementation" + default y + help + The default random number generator as provided with + drivers/char/random.c is selected with this option. + +config LRNG + bool "Linux Random Number Generator" + default n + select CRYPTO_LIB_SHA256 if CRYPTO + help + The Linux Random Number Generator (LRNG) generates entropy + from different entropy sources. Each entropy source can + be enabled and configured independently. The interrupt + entropy source can be configured to be SP800-90B compliant. + The entire LRNG can be configured to be SP800-90C compliant. + Runtime-switchable cryptographic support is available. + The LRNG delivers significant entropy during boot. + + The LRNG also provides compliance to SP800-90A/B/C. + +menu "Linux Random Number Generator Configuration" + depends on LRNG + +if LRNG + +config LRNG_SHA256 + bool + default y if CRYPTO_LIB_SHA256 + +config LRNG_SHA1 + bool + default y if !CRYPTO_LIB_SHA256 + +config LRNG_COMMON_DEV_IF + bool + +config LRNG_DRNG_ATOMIC + bool + select LRNG_DRNG_CHACHA20 + +config LRNG_SYSCTL + bool + depends on SYSCTL + +config LRNG_RANDOM_IF + bool + default n if RANDOM_DEFAULT_IMPL + default y if !RANDOM_DEFAULT_IMPL + select LRNG_COMMON_DEV_IF + select LRNG_DRNG_ATOMIC + select LRNG_SYSCTL + +menu "LRNG Interfaces" + +config LRNG_KCAPI_IF + tristate "Interface with Kernel Crypto API" + depends on CRYPTO_RNG + help + The LRNG can be registered with the kernel crypto API's + random number generator framework. This offers a random + number generator with the name "lrng" and a priority that + is intended to be higher than the existing RNG + implementations. + +config LRNG_HWRAND_IF + tristate "Interface with Hardware Random Number Generator Framework" + depends on HW_RANDOM + select LRNG_DRNG_ATOMIC + help + The LRNG can be registered with the hardware random number + generator framework. This offers a random number generator + with the name "lrng" that is accessible via the framework. + For example it allows pulling data from the LRNG via the + /dev/hwrng file. + +config LRNG_DEV_IF + bool "Character device file interface" + select LRNG_COMMON_DEV_IF + help + The LRNG can create a character device file that operates + identically to /dev/random including IOCTL, read and write + operations. + +endmenu # "LRNG Interfaces" + +menu "Entropy Source Configuration" + +config LRNG_RUNTIME_ES_CONFIG + bool "Enable runtime configuration of entropy sources" + help + When enabling this option, the LRNG provides the mechanism + allowing to alter the entropy rate of each entropy source + during boot time and runtime. + + Each entropy source allows its entropy rate changed with + a kernel command line option. When not providing any + option, the default specified during kernel compilation + is applied. + +comment "Common Timer-based Entropy Source Configuration" + +config LRNG_IRQ_DFLT_TIMER_ES + bool + +config LRNG_SCHED_DFLT_TIMER_ES + bool + +config LRNG_TIMER_COMMON + bool + +choice + prompt "Default Timer-based Entropy Source" + default LRNG_IRQ_DFLT_TIMER_ES + depends on LRNG_TIMER_COMMON + help + Select the timer-based entropy source that is credited + with entropy. The other timer-based entropy sources may + be operational and provide data, but are credited with no + entropy. + + config LRNG_IRQ_DFLT_TIMER_ES + bool "Interrupt Entropy Source" + depends on LRNG_IRQ + help + The interrupt entropy source is selected as a timer-based + entropy source to provide entropy. + + config LRNG_SCHED_DFLT_TIMER_ES + bool "Scheduler Entropy Source" + depends on LRNG_SCHED + help + The scheduler entropy source is selected as timer-based + entropy source to provide entropy. +endchoice + +choice + prompt "LRNG Entropy Collection Pool Size" + default LRNG_COLLECTION_SIZE_1024 + depends on LRNG_TIMER_COMMON + help + Select the size of the LRNG entropy collection pool + storing data for the interrupt as well as the scheduler + entropy sources without performing a compression + operation. The larger the collection size is, the faster + the average interrupt handling will be. The collection + size represents the number of bytes of the per-CPU memory + used to batch up entropy event data. + + The default value is good for regular operations. Choose + larger sizes for servers that have no memory limitations. + If runtime memory is precious, choose a smaller size. + + The collection size is unrelated to the entropy rate + or the amount of entropy the LRNG can process. + + config LRNG_COLLECTION_SIZE_32 + depends on LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on !LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "32 interrupt events" + + config LRNG_COLLECTION_SIZE_256 + depends on !LRNG_OVERSAMPLE_ENTROPY_SOURCES + bool "256 interrupt events" + + config LRNG_COLLECTION_SIZE_512 + bool "512 interrupt events" + + config LRNG_COLLECTION_SIZE_1024 + bool "1024 interrupt events (default)" + + config LRNG_COLLECTION_SIZE_2048 + bool "2048 interrupt events" + + config LRNG_COLLECTION_SIZE_4096 + bool "4096 interrupt events" + + config LRNG_COLLECTION_SIZE_8192 + bool "8192 interrupt events" + +endchoice + +config LRNG_COLLECTION_SIZE + int + default 32 if LRNG_COLLECTION_SIZE_32 + default 256 if LRNG_COLLECTION_SIZE_256 + default 512 if LRNG_COLLECTION_SIZE_512 + default 1024 if LRNG_COLLECTION_SIZE_1024 + default 2048 if LRNG_COLLECTION_SIZE_2048 + default 4096 if LRNG_COLLECTION_SIZE_4096 + default 8192 if LRNG_COLLECTION_SIZE_8192 + +config LRNG_HEALTH_TESTS + bool "Enable internal entropy source online health tests" + depends on LRNG_TIMER_COMMON + help + The online health tests applied to the interrupt entropy + source and to the scheduler entropy source to validate + the noise source at runtime for fatal errors. These tests + include SP800-90B compliant tests which are invoked if + the system is booted with fips=1. In case of fatal errors + during active SP800-90B tests, the issue is logged and + the noise data is discarded. These tests are required for + full compliance of the interrupt entropy source with + SP800-90B. + + If both, the scheduler and the interrupt entropy sources, + are enabled, the health tests for both are applied + independent of each other. + + If unsure, say Y. + +config LRNG_RCT_BROKEN + bool "SP800-90B RCT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B repetitive + count test (RCT) cutoff value which makes it very likely + that the RCT is triggered to raise a self test failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B RCT health test. + + If unsure, say N. + +config LRNG_APT_BROKEN + bool "SP800-90B APT with dangerous low cutoff value" + depends on LRNG_HEALTH_TESTS + depends on BROKEN + default n + help + This option enables a dangerously low SP800-90B adaptive + proportion test (APT) cutoff value which makes it very + likely that the APT is triggered to raise a self test + failure. + + This option is ONLY intended for developers wanting to + test the effectiveness of the SP800-90B APT health test. + + If unsure, say N. + +# Default taken from SP800-90B sec 4.4.1 - significance level 2^-30 +config LRNG_RCT_CUTOFF + int + default 31 if !LRNG_RCT_BROKEN + default 1 if LRNG_RCT_BROKEN + +# Default taken from SP800-90B sec 4.4.2 - significance level 2^-30 +config LRNG_APT_CUTOFF + int + default 325 if !LRNG_APT_BROKEN + default 32 if LRNG_APT_BROKEN + +comment "Interrupt Entropy Source" + +config LRNG_IRQ + bool "Enable Interrupt Entropy Source as LRNG Seed Source" + default y + depends on !RANDOM_DEFAULT_IMPL + select LRNG_TIMER_COMMON + help + The LRNG models an entropy source based on the timing of the + occurrence of interrupts. Enable this option to enable this + IRQ entropy source. + + The IRQ entropy source is triggered every time an interrupt + arrives and thus causes the interrupt handler to execute + slightly longer. Disabling the IRQ entropy source implies + that the performance penalty on the interrupt handler added + by the LRNG is eliminated. Yet, this entropy source is + considered to be an internal entropy source of the LRNG. + Thus, only disable it if you ensured that other entropy + sources are available that supply the LRNG with entropy. + + If you disable the IRQ entropy source, you MUST ensure + one or more entropy sources collectively have the + capability to deliver sufficient entropy with one invocation + at a rate compliant to the security strength of the DRNG + (usually 256 bits of entropy). In addition, if those + entropy sources do not deliver sufficient entropy during + first request, the reseed must be triggered from user + space or kernel space when sufficient entropy is considered + to be present. + + If unsure, say Y. + +choice + prompt "Continuous entropy compression boot time setting" + default LRNG_CONTINUOUS_COMPRESSION_ENABLED + depends on LRNG_IRQ + help + Select the default behavior of the interrupt entropy source + continuous compression operation. + + The LRNG IRQ ES collects entropy data during each interrupt. + For performance reasons, a amount of entropy data defined by + the LRNG entropy collection pool size is concatenated into + an array. When that array is filled up, a hash is calculated + to compress the entropy. That hash is calculated in + interrupt context. + + In case such hash calculation in interrupt context is deemed + too time-consuming, the continuous compression operation + can be disabled. If disabled, the collection of entropy will + not trigger a hash compression operation in interrupt context. + The compression happens only when the DRNG is reseeded which is + in process context. This implies that old entropy data + collected after the last DRNG-reseed is overwritten with newer + entropy data once the collection pool is full instead of + retaining its entropy with the compression operation. + + config LRNG_CONTINUOUS_COMPRESSION_ENABLED + bool "Enable continuous compression (default)" + + config LRNG_CONTINUOUS_COMPRESSION_DISABLED + bool "Disable continuous compression" + +endchoice + +config LRNG_ENABLE_CONTINUOUS_COMPRESSION + bool + default y if LRNG_CONTINUOUS_COMPRESSION_ENABLED + default n if LRNG_CONTINUOUS_COMPRESSION_DISABLED + +config LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION + bool "Runtime-switchable continuous entropy compression" + depends on LRNG_IRQ + help + Per default, the interrupt entropy source continuous + compression operation behavior is hard-wired into the kernel. + Enable this option to allow it to be configurable at boot time. + + To modify the default behavior of the continuous + compression operation, use the kernel command line option + of lrng_sw_noise.lrng_pcpu_continuous_compression. + + If unsure, say N. + +config LRNG_IRQ_ENTROPY_RATE + int "Interrupt Entropy Source Entropy Rate" + depends on LRNG_IRQ + range 256 4294967295 if LRNG_IRQ_DFLT_TIMER_ES + range 4294967295 4294967295 if !LRNG_IRQ_DFLT_TIMER_ES + default 256 if LRNG_IRQ_DFLT_TIMER_ES + default 4294967295 if !LRNG_IRQ_DFLT_TIMER_ES + help + The LRNG will collect the configured number of interrupts to + obtain 256 bits of entropy. This value can be set to any between + 256 and 4294967295. The LRNG guarantees that this value is not + lower than 256. This lower limit implies that one interrupt event + is credited with one bit of entropy. This value is subject to the + increase by the oversampling factor, if no high-resolution timer + is found. + + In order to effectively disable the interrupt entropy source, + the option has to be set to 4294967295. In this case, the + interrupt entropy source will still deliver data but without + being credited with entropy. + +comment "Jitter RNG Entropy Source" + +config LRNG_JENT + bool "Enable Jitter RNG as LRNG Seed Source" + depends on CRYPTO + select CRYPTO_JITTERENTROPY + help + The LRNG may use the Jitter RNG as entropy source. Enabling + this option enables the use of the Jitter RNG. Its default + entropy level is 16 bits of entropy per 256 data bits delivered + by the Jitter RNG. This entropy level can be changed at boot + time or at runtime with the lrng_base.jitterrng configuration + variable. + +config LRNG_JENT_ENTROPY_RATE + int "Jitter RNG Entropy Source Entropy Rate" + depends on LRNG_JENT + range 0 256 + default 16 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the Jitter RNG entropy source. The + LRNG enforces the limit that this value must be in the range + between 0 and 256. + + When configuring this value to 0, the Jitter RNG entropy source + will provide 256 bits of data without being credited to contain + entropy. + +comment "CPU Entropy Source" + +config LRNG_CPU + bool "Enable CPU Entropy Source as LRNG Seed Source" + default y + help + Current CPUs commonly contain entropy sources which can be + used to seed the LRNG. For example, the Intel RDSEED + instruction, or the POWER DARN instruction will be sourced + to seed the LRNG if this option is enabled. + + Note, if this option is enabled and the underlying CPU + does not offer such entropy source, the LRNG will automatically + detect this and ignore the hardware. + +config LRNG_CPU_FULL_ENT_MULTIPLIER + int + default 1 if !LRNG_TEST_CPU_ES_COMPRESSION + default 123 if LRNG_TEST_CPU_ES_COMPRESSION + +config LRNG_CPU_ENTROPY_RATE + int "CPU Entropy Source Entropy Rate" + depends on LRNG_CPU + range 0 256 + default 8 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the CPU entropy source. The LRNG + enforces the limit that this value must be in the range between + 0 and 256. + + When configuring this value to 0, the CPU entropy source will + provide 256 bits of data without being credited to contain + entropy. + + Note, this option is overwritten when the option + CONFIG_RANDOM_TRUST_CPU is set. + +comment "Scheduler Entropy Source" + +config LRNG_SCHED + bool "Enable Scheduer Entropy Source as LRNG Seed Source" + select LRNG_TIMER_COMMON + help + The LRNG models an entropy source based on the timing of the + occurrence of scheduler-triggered context switches. Enable + this option to enable this scheduler entropy source. + + The scheduler entropy source is triggered every time a + context switch is triggered thus causes the scheduler to + execute slightly longer. Disabling the scheduler entropy + source implies that the performance penalty on the scheduler + added by the LRNG is eliminated. Yet, this entropy source is + considered to be an internal entropy source of the LRNG. + Thus, only disable it if you ensured that other entropy + sources are available that supply the LRNG with entropy. + + If you disable the scheduler entropy source, you MUST + ensure one or more entropy sources collectively have the + capability to deliver sufficient entropy with one invocation + at a rate compliant to the security strength of the DRNG + (usually 256 bits of entropy). In addition, if those + entropy sources do not deliver sufficient entropy during + first request, the reseed must be triggered from user + space or kernel space when sufficient entropy is considered + to be present. + + If unsure, say Y. + +config LRNG_SCHED_ENTROPY_RATE + int "Scheduler Entropy Source Entropy Rate" + depends on LRNG_SCHED + range 256 4294967295 if LRNG_SCHED_DFLT_TIMER_ES + range 4294967295 4294967295 if !LRNG_SCHED_DFLT_TIMER_ES + default 256 if LRNG_SCHED_DFLT_TIMER_ES + default 4294967295 if !LRNG_SCHED_DFLT_TIMER_ES + help + The LRNG will collect the configured number of context switches + triggered by the scheduler to obtain 256 bits of entropy. This + value can be set to any between 256 and 4294967295. The LRNG + guarantees that this value is not lower than 256. This lower + limit implies that one interrupt event is credited with one bit + of entropy. This value is subject to the increase by the + oversampling factor, if no high-resolution timer is found. + + In order to effectively disable the scheduler entropy source, + the option has to be set to 4294967295. In this case, the + scheduler entropy source will still deliver data but without + being credited with entropy. + +comment "Kernel RNG Entropy Source" + +config LRNG_KERNEL_RNG + bool "Enable Kernel RNG as LRNG Seed Source" + depends on RANDOM_DEFAULT_IMPL + help + The LRNG may use the kernel RNG (random.c) as entropy + source. + +config LRNG_KERNEL_RNG_ENTROPY_RATE + int "Kernel RNG Entropy Source Entropy Rate" + depends on LRNG_KERNEL_RNG + range 0 256 + default 256 + help + The option defines the amount of entropy the LRNG applies to 256 + bits of data obtained from the kernel RNG entropy source. The + LRNG enforces the limit that this value must be in the range + between 0 and 256. + + When configuring this value to 0, the kernel RNG entropy source + will provide 256 bits of data without being credited to contain + entropy. + + Note: This value is set to 0 automatically when booting the + kernel in FIPS mode (with fips=1 kernel command line option). + This is due to the fact that random.c is not SP800-90B + compliant. + +endmenu # "Entropy Source Configuration" + +config LRNG_DRNG_CHACHA20 + tristate + +config LRNG_DRBG + tristate + depends on CRYPTO + select CRYPTO_DRBG_MENU + +config LRNG_DRNG_KCAPI + tristate + depends on CRYPTO + select CRYPTO_RNG + +config LRNG_SWITCH + bool + +menuconfig LRNG_SWITCH_HASH + bool "Support conditioning hash runtime switching" + select LRNG_SWITCH + help + The LRNG uses a default message digest. With this + configuration option other message digests can be selected + and loaded at runtime. + +if LRNG_SWITCH_HASH + +config LRNG_HASH_KCAPI + tristate "Kernel crypto API hashing support for LRNG" + select CRYPTO_HASH + select CRYPTO_SHA512 + help + Enable the kernel crypto API support for entropy compression + and conditioning functions. + +endif # LRNG_SWITCH_HASH + +menuconfig LRNG_SWITCH_DRNG + bool "Support DRNG runtime switching" + select LRNG_SWITCH + help + The LRNG uses a default DRNG With this configuration + option other DRNGs or message digests can be selected and + loaded at runtime. + +if LRNG_SWITCH_DRNG + +config LRNG_SWITCH_DRNG_CHACHA20 + tristate "ChaCha20-based DRNG support for LRNG" + depends on !LRNG_DFLT_DRNG_CHACHA20 + select LRNG_DRNG_CHACHA20 + help + Enable the ChaCha20-based DRNG. This DRNG implementation + does not depend on the kernel crypto API presence. + +config LRNG_SWITCH_DRBG + tristate "SP800-90A support for the LRNG" + depends on !LRNG_DFLT_DRNG_DRBG + select LRNG_DRBG + help + Enable the SP800-90A DRBG support for the LRNG. Once the + module is loaded, output from /dev/random, /dev/urandom, + getrandom(2), or get_random_bytes_full is provided by a DRBG. + +config LRNG_SWITCH_DRNG_KCAPI + tristate "Kernel Crypto API support for the LRNG" + depends on !LRNG_DFLT_DRNG_KCAPI + depends on !LRNG_SWITCH_DRBG + select LRNG_DRNG_KCAPI + help + Enable the support for generic pseudo-random number + generators offered by the kernel crypto API with the + LRNG. Once the module is loaded, output from /dev/random, + /dev/urandom, getrandom(2), or get_random_bytes is + provided by the selected kernel crypto API RNG. + +endif # LRNG_SWITCH_DRNG + +choice + prompt "LRNG Default DRNG" + default LRNG_DFLT_DRNG_CHACHA20 + help + Select the default deterministic random number generator + that is used by the LRNG. When enabling the switchable + cryptographic mechanism support, this DRNG can be + replaced at runtime. + + config LRNG_DFLT_DRNG_CHACHA20 + bool "ChaCha20-based DRNG" + select LRNG_DRNG_CHACHA20 + + config LRNG_DFLT_DRNG_DRBG + bool "SP800-90A DRBG" + select LRNG_DRBG + + config LRNG_DFLT_DRNG_KCAPI + bool "Kernel Crypto API DRNG" + select LRNG_DRNG_KCAPI +endchoice + +menuconfig LRNG_TESTING_MENU + bool "LRNG testing interfaces" + depends on DEBUG_FS + help + Enable one or more of the following test interfaces. + + If unsure, say N. + +if LRNG_TESTING_MENU + +config LRNG_TESTING + bool + +config LRNG_TESTING_RECORDING + bool + +comment "Interrupt Entropy Source Test Interfaces" + +config LRNG_RAW_HIRES_ENTROPY + bool "Interface to obtain raw unprocessed IRQ noise source data" + default y + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned high resolution time stamp noise that + is collected by the LRNG for statistical analysis. Extracted + noise data is not used to seed the LRNG. + + The raw noise data can be obtained using the lrng_raw_hires + debugfs file. Using the option lrng_testing.boot_raw_hires_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_JIFFIES_ENTROPY + bool "Entropy test interface to Jiffies of IRQ noise source" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned Jiffies that is collected by + the LRNG for statistical analysis. This data is used for + seeding the LRNG if a high-resolution time stamp is not + available. If a high-resolution time stamp is detected, + the Jiffies value is not collected by the LRNG and no + data is provided via the test interface. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_jiffies + debugfs file. Using the option lrng_testing.boot_raw_jiffies_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_IRQ_ENTROPY + bool "Entropy test interface to IRQ number noise source" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt number that is collected by + the LRNG for statistical analysis. Extracted noise data is + not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_irq + debugfs file. Using the option lrng_testing.boot_raw_irq_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_RETIP_ENTROPY + bool "Entropy test interface to RETIP value of IRQ noise source" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned return instruction pointer value + that is collected by the LRNG for statistical analysis. + Extracted noise data is not used to seed the random number + generator. + + The raw noise data can be obtained using the lrng_raw_retip + debugfs file. Using the option lrng_testing.boot_raw_retip_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_REGS_ENTROPY + bool "Entropy test interface to IRQ register value noise source" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned interrupt register value that is + collected by the LRNG for statistical analysis. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the lrng_raw_regs + debugfs file. Using the option lrng_testing.boot_raw_regs_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_ARRAY + bool "Test interface to LRNG raw entropy IRQ storage array" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw noise data that is collected by the LRNG + in the per-CPU array for statistical analysis. The purpose + of this interface is to verify that the array handling code + truly only concatenates data and provides the same entropy + rate as the raw unconditioned noise source when assessing + the collected data byte-wise. + + The data can be obtained using the lrng_raw_array debugfs + file. Using the option lrng_testing.boot_raw_array=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_IRQ_PERF + bool "LRNG interrupt entropy source performance monitor" + depends on LRNG_IRQ + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + With this option, the performance monitor of the LRNG + interrupt handling code is enabled. The file provides + the execution time of the interrupt handler in + cycles. + + The interrupt performance data can be obtained using + the lrng_irq_perf debugfs file. Using the option + lrng_testing.boot_irq_perf=1 the performance data of + the first 1000 entropy events since boot can be sampled. + +comment "Scheduler Entropy Source Test Interfaces" + +config LRNG_RAW_SCHED_HIRES_ENTROPY + bool "Interface to obtain raw unprocessed scheduler noise source data" + depends on LRNG_SCHED + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned high resolution time stamp noise that + is collected by the LRNG for the Scheduler-based noise source + for statistical analysis. Extracted noise data is not used to + seed the LRNG. + + The raw noise data can be obtained using the lrng_raw_sched_hires + debugfs file. Using the option + lrng_testing.boot_raw_sched_hires_test=1 the raw noise of the + first 1000 entropy events since boot can be sampled. + +config LRNG_RAW_SCHED_PID_ENTROPY + bool "Entropy test interface to PID value" + depends on LRNG_SCHED + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned PID value that is collected by the + LRNG for statistical analysis. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the + lrng_raw_sched_pid debugfs file. Using the option + lrng_testing.boot_raw_sched_pid_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_RAW_SCHED_START_TIME_ENTROPY + bool "Entropy test interface to task start time value" + depends on LRNG_SCHED + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned task start time value that is collected + by the LRNG for statistical analysis. Extracted noise + data is not used to seed the random number generator. + + The raw noise data can be obtained using the + lrng_raw_sched_starttime debugfs file. Using the option + lrng_testing.boot_raw_sched_starttime_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + + +config LRNG_RAW_SCHED_NVCSW_ENTROPY + bool "Entropy test interface to task context switch numbers" + depends on LRNG_SCHED + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + The test interface allows a privileged process to capture + the raw unconditioned task numbers of context switches that + are collected by the LRNG for statistical analysis. Extracted + noise data is not used to seed the random number generator. + + The raw noise data can be obtained using the + lrng_raw_sched_nvcsw debugfs file. Using the option + lrng_testing.boot_raw_sched_nvcsw_test=1 + the raw noise of the first 1000 entropy events since boot + can be sampled. + +config LRNG_SCHED_PERF + bool "LRNG scheduler entropy source performance monitor" + depends on LRNG_SCHED + select LRNG_TESTING + select LRNG_TESTING_RECORDING + help + With this option, the performance monitor of the LRNG + scheduler event handling code is enabled. The file provides + the execution time of the interrupt handler in cycles. + + The scheduler performance data can be obtained using + the lrng_sched_perf debugfs file. Using the option + lrng_testing.boot_sched_perf=1 the performance data of + the first 1000 entropy events since boot can be sampled. + +comment "Auxiliary Test Interfaces" + +config LRNG_ACVT_HASH + bool "Enable LRNG ACVT Hash interface" + select LRNG_TESTING + help + With this option, the LRNG built-in hash function used for + auxiliary pool management and prior to switching the + cryptographic backends is made available for ACVT. The + interface allows writing of the data to be hashed + into the interface. The read operation triggers the hash + operation to generate message digest. + + The ACVT interface is available with the lrng_acvt_hash + debugfs file. + +config LRNG_RUNTIME_MAX_WO_RESEED_CONFIG + bool "Enable runtime configuration of max reseed threshold" + help + When enabling this option, the LRNG provides an interface + allowing the setting of the maximum number of DRNG generate + operations without a reseed that has full entropy. The + interface is lrng_drng.max_wo_reseed. + +config LRNG_TEST_CPU_ES_COMPRESSION + bool "Force CPU ES compression operation" + help + When enabling this option, the CPU ES compression operation + is forced by setting an arbitrary value > 1 for the data + multiplier even when the CPU ES would deliver full entropy. + This allows testing of the compression operation. It + therefore forces to pull more data from the CPU ES + than what may be required. + +endif #LRNG_TESTING_MENU + +config LRNG_SELFTEST + bool "Enable power-on and on-demand self-tests" + help + The power-on self-tests are executed during boot time + covering the ChaCha20 DRNG, the hash operation used for + processing the entropy pools and the auxiliary pool, and + the time stamp management of the LRNG. + + The on-demand self-tests are triggered by writing any + value into the SysFS file selftest_status. At the same + time, when reading this file, the test status is + returned. A zero indicates that all tests were executed + successfully. + + If unsure, say Y. + +if LRNG_SELFTEST + +config LRNG_SELFTEST_PANIC + bool "Panic the kernel upon self-test failure" + help + If the option is enabled, the kernel is terminated if an + LRNG power-on self-test failure is detected. + +endif # LRNG_SELFTEST + +endif # LRNG + +endmenu # LRNG diff --git a/drivers/char/lrng/Makefile b/drivers/char/lrng/Makefile new file mode 100644 index 0000000000000..f61fc40f4620d --- /dev/null +++ b/drivers/char/lrng/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Entropy Source and DRNG Manager. +# + +obj-y += lrng_es_mgr.o lrng_drng_mgr.o \ + lrng_es_aux.o +obj-$(CONFIG_LRNG_SHA256) += lrng_sha256.o +obj-$(CONFIG_LRNG_SHA1) += lrng_sha1.o + +obj-$(CONFIG_SYSCTL) += lrng_proc.o +obj-$(CONFIG_LRNG_SYSCTL) += lrng_sysctl.o +obj-$(CONFIG_NUMA) += lrng_numa.o + +obj-$(CONFIG_LRNG_SWITCH) += lrng_switch.o +obj-$(CONFIG_LRNG_HASH_KCAPI) += lrng_hash_kcapi.o +obj-$(CONFIG_LRNG_DRNG_CHACHA20) += lrng_drng_chacha20.o +obj-$(CONFIG_LRNG_DRBG) += lrng_drng_drbg.o +obj-$(CONFIG_LRNG_DRNG_KCAPI) += lrng_drng_kcapi.o +obj-$(CONFIG_LRNG_DRNG_ATOMIC) += lrng_drng_atomic.o + +obj-$(CONFIG_LRNG_TIMER_COMMON) += lrng_es_timer_common.o +obj-$(CONFIG_LRNG_IRQ) += lrng_es_irq.o +obj-$(CONFIG_LRNG_KERNEL_RNG) += lrng_es_krng.o +obj-$(CONFIG_LRNG_SCHED) += lrng_es_sched.o +obj-$(CONFIG_LRNG_CPU) += lrng_es_cpu.o +obj-$(CONFIG_LRNG_JENT) += lrng_es_jent.o + +obj-$(CONFIG_LRNG_HEALTH_TESTS) += lrng_health.o +obj-$(CONFIG_LRNG_TESTING) += lrng_testing.o +obj-$(CONFIG_LRNG_SELFTEST) += lrng_selftest.o + +obj-$(CONFIG_LRNG_COMMON_DEV_IF) += lrng_interface_dev_common.o +obj-$(CONFIG_LRNG_RANDOM_IF) += lrng_interface_random_user.o \ + lrng_interface_random_kernel.o \ + lrng_interface_aux.o +obj-$(CONFIG_LRNG_KCAPI_IF) += lrng_interface_kcapi.o +obj-$(CONFIG_LRNG_DEV_IF) += lrng_interface_dev.o +obj-$(CONFIG_LRNG_HWRAND_IF) += lrng_interface_hwrand.o diff --git a/drivers/char/lrng/lrng_definitions.h b/drivers/char/lrng/lrng_definitions.h new file mode 100644 index 0000000000000..50f87c40f701a --- /dev/null +++ b/drivers/char/lrng/lrng_definitions.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_DEFINITIONS_H +#define _LRNG_DEFINITIONS_H + +#include +#include +#include + +/*************************** General LRNG parameter ***************************/ + +/* + * Specific settings for different use cases + */ +#ifdef CONFIG_CRYPTO_FIPS +# define LRNG_OVERSAMPLE_ES_BITS 64 +# define LRNG_SEED_BUFFER_INIT_ADD_BITS 128 +#else /* CONFIG_CRYPTO_FIPS */ +# define LRNG_OVERSAMPLE_ES_BITS 0 +# define LRNG_SEED_BUFFER_INIT_ADD_BITS 0 +#endif /* CONFIG_CRYPTO_FIPS */ + +/* Security strength of LRNG -- this must match DRNG security strength */ +#define LRNG_DRNG_SECURITY_STRENGTH_BYTES 32 +#define LRNG_DRNG_SECURITY_STRENGTH_BITS (LRNG_DRNG_SECURITY_STRENGTH_BYTES * 8) +#define LRNG_DRNG_INIT_SEED_SIZE_BITS \ + (LRNG_DRNG_SECURITY_STRENGTH_BITS + LRNG_SEED_BUFFER_INIT_ADD_BITS) +#define LRNG_DRNG_INIT_SEED_SIZE_BYTES (LRNG_DRNG_INIT_SEED_SIZE_BITS >> 3) + +/* + * SP800-90A defines a maximum request size of 1<<16 bytes. The given value is + * considered a safer margin. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_REQSIZE (1<<12) + +/* + * SP800-90A defines a maximum number of requests between reseeds of 2^48. + * The given value is considered a much safer margin, balancing requests for + * frequent reseeds with the need to conserve entropy. This value MUST NOT be + * larger than INT_MAX because it is used in an atomic_t. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_RESEED_THRESH (1<<20) + +/* + * Maximum DRNG generation operations without reseed having full entropy + * This value defines the absolute maximum value of DRNG generation operations + * without a reseed holding full entropy. LRNG_DRNG_RESEED_THRESH is the + * threshold when a new reseed is attempted. But it is possible that this fails + * to deliver full entropy. In this case the DRNG will continue to provide data + * even though it was not reseeded with full entropy. To avoid in the extreme + * case that no reseed is performed for too long, this threshold is enforced. + * If that absolute low value is reached, the LRNG is marked as not operational. + * + * This value is allowed to be changed. + */ +#define LRNG_DRNG_MAX_WITHOUT_RESEED (1<<30) + +/* + * Min required seed entropy is 128 bits covering the minimum entropy + * requirement of SP800-131A and the German BSI's TR02102. + * + * This value is allowed to be changed. + */ +#define LRNG_FULL_SEED_ENTROPY_BITS LRNG_DRNG_SECURITY_STRENGTH_BITS +#define LRNG_MIN_SEED_ENTROPY_BITS 128 +#define LRNG_INIT_ENTROPY_BITS 32 + +/* + * Wakeup value + * + * This value is allowed to be changed but must not be larger than the + * digest size of the hash operation used update the aux_pool. + */ +#ifdef CONFIG_LRNG_SHA256 +# define LRNG_ATOMIC_DIGEST_SIZE SHA256_DIGEST_SIZE +#else +# define LRNG_ATOMIC_DIGEST_SIZE SHA1_DIGEST_SIZE +#endif +#define LRNG_WRITE_WAKEUP_ENTROPY LRNG_ATOMIC_DIGEST_SIZE + +/* + * If the switching support is configured, we must provide support up to + * the largest digest size. Without switching support, we know it is only + * the built-in digest size. + */ +#ifdef CONFIG_LRNG_SWITCH +# define LRNG_MAX_DIGESTSIZE 64 +#else +# define LRNG_MAX_DIGESTSIZE LRNG_ATOMIC_DIGEST_SIZE +#endif + +/* + * Oversampling factor of timer-based events to obtain + * LRNG_DRNG_SECURITY_STRENGTH_BYTES. This factor is used when a + * high-resolution time stamp is not available. In this case, jiffies and + * register contents are used to fill the entropy pool. These noise sources + * are much less entropic than the high-resolution timer. The entropy content + * is the entropy content assumed with LRNG_[IRQ|SCHED]_ENTROPY_BITS divided by + * LRNG_ES_OVERSAMPLING_FACTOR. + * + * This value is allowed to be changed. + */ +#define LRNG_ES_OVERSAMPLING_FACTOR 10 + +/* Alignmask that is intended to be identical to CRYPTO_MINALIGN */ +#define LRNG_KCAPI_ALIGN ARCH_KMALLOC_MINALIGN + +/* + * This definition must provide a buffer that is equal to SHASH_DESC_ON_STACK + * as it will be casted into a struct shash_desc. + */ +#define LRNG_POOL_SIZE (sizeof(struct shash_desc) + HASH_MAX_DESCSIZE) + +/****************************** Helper code ***********************************/ + +static inline u32 lrng_fast_noise_entropylevel(u32 ent_bits, u32 requested_bits) +{ + /* Obtain entropy statement */ + ent_bits = ent_bits * requested_bits / LRNG_DRNG_SECURITY_STRENGTH_BITS; + /* Cap entropy to buffer size in bits */ + ent_bits = min_t(u32, ent_bits, requested_bits); + return ent_bits; +} + +/* Convert entropy in bits into nr. of events with the same entropy content. */ +static inline u32 lrng_entropy_to_data(u32 entropy_bits, u32 entropy_rate) +{ + return ((entropy_bits * entropy_rate) / + LRNG_DRNG_SECURITY_STRENGTH_BITS); +} + +/* Convert number of events into entropy value. */ +static inline u32 lrng_data_to_entropy(u32 num, u32 entropy_rate) +{ + return ((num * LRNG_DRNG_SECURITY_STRENGTH_BITS) / + entropy_rate); +} + +static inline u32 atomic_read_u32(atomic_t *v) +{ + return (u32)atomic_read(v); +} + +#endif /* _LRNG_DEFINITIONS_H */ diff --git a/drivers/char/lrng/lrng_drng_atomic.c b/drivers/char/lrng/lrng_drng_atomic.c new file mode 100644 index 0000000000000..e3a8fece9935e --- /dev/null +++ b/drivers/char/lrng/lrng_drng_atomic.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG for atomic contexts + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_drng_atomic.h" +#include "lrng_drng_chacha20.h" +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_sha.h" + +static struct chacha20_state chacha20_atomic = { + LRNG_CC20_INIT_RFC7539(.block) +}; + +/* + * DRNG usable in atomic context. This DRNG will always use the ChaCha20 + * DRNG. It will never benefit from a DRNG switch like the "regular" DRNG. If + * there was no DRNG switch, the atomic DRNG is identical to the "regular" DRNG. + * + * The reason for having this is due to the fact that DRNGs other than + * the ChaCha20 DRNG may sleep. + */ +static struct lrng_drng lrng_drng_atomic = { + LRNG_DRNG_STATE_INIT(lrng_drng_atomic, + &chacha20_atomic, NULL, + &lrng_cc20_drng_cb, &lrng_sha_hash_cb), + .spin_lock = __SPIN_LOCK_UNLOCKED(lrng_drng_atomic.spin_lock) +}; + +void lrng_drng_atomic_reset(void) +{ + unsigned long flags; + + spin_lock_irqsave(&lrng_drng_atomic.spin_lock, flags); + lrng_drng_reset(&lrng_drng_atomic); + spin_unlock_irqrestore(&lrng_drng_atomic.spin_lock, flags); +} + +void lrng_drng_atomic_force_reseed(void) +{ + lrng_drng_atomic.force_reseed = lrng_drng_atomic.fully_seeded; +} + +static bool lrng_drng_atomic_must_reseed(struct lrng_drng *drng) +{ + return (!drng->fully_seeded || + atomic_read(&lrng_drng_atomic.requests) == 0 || + drng->force_reseed || + time_after(jiffies, + drng->last_seeded + lrng_drng_reseed_max_time * HZ)); +} + +void lrng_drng_atomic_seed_drng(struct lrng_drng *regular_drng) +{ + u8 seedbuf[LRNG_DRNG_SECURITY_STRENGTH_BYTES] + __aligned(LRNG_KCAPI_ALIGN); + int ret; + + if (!lrng_drng_atomic_must_reseed(&lrng_drng_atomic)) + return; + + /* + * Reseed atomic DRNG another DRNG "regular" while this regular DRNG + * is reseeded. Therefore, this code operates in non-atomic context and + * thus can use the lrng_drng_get function to get random numbers from + * the just freshly seeded DRNG. + */ + ret = lrng_drng_get(regular_drng, seedbuf, sizeof(seedbuf)); + + if (ret < 0) { + pr_warn("Error generating random numbers for atomic DRNG: %d\n", + ret); + } else { + unsigned long flags; + + spin_lock_irqsave(&lrng_drng_atomic.spin_lock, flags); + lrng_drng_inject(&lrng_drng_atomic, seedbuf, ret, + regular_drng->fully_seeded, "atomic"); + spin_unlock_irqrestore(&lrng_drng_atomic.spin_lock, flags); + } + memzero_explicit(&seedbuf, sizeof(seedbuf)); +} + +void lrng_drng_atomic_seed_es(void) +{ + struct entropy_buf seedbuf __aligned(LRNG_KCAPI_ALIGN); + struct lrng_drng *drng = &lrng_drng_atomic; + unsigned long flags; + u32 requested_bits = LRNG_MIN_SEED_ENTROPY_BITS; + + if (lrng_drng_atomic.fully_seeded) + return; + + /* + * When the LRNG reached the minimally seeded level, leave 256 bits of + * entropy for the regular DRNG. In addition ensure that additional + * 256 bits are available for the atomic DRNG to get to the fully + * seeded stage of the LRNG. + */ + if (lrng_state_min_seeded()) { + u32 avail_bits = lrng_avail_entropy(); + + requested_bits = + (avail_bits >= 2 * LRNG_FULL_SEED_ENTROPY_BITS) ? + LRNG_FULL_SEED_ENTROPY_BITS : 0; + + if (!requested_bits) + return; + } + + pr_debug("atomic DRNG seeding attempt to pull %u bits of entropy directly from entropy sources\n", + requested_bits); + + lrng_fill_seed_buffer(&seedbuf, requested_bits); + spin_lock_irqsave(&drng->spin_lock, flags); + lrng_drng_inject(&lrng_drng_atomic, (u8 *)&seedbuf, sizeof(seedbuf), + lrng_fully_seeded_eb(drng->fully_seeded, &seedbuf), + "atomic"); + spin_unlock_irqrestore(&drng->spin_lock, flags); + lrng_init_ops(&seedbuf); + + /* + * Do not call lrng_init_ops(seedbuf) here as the atomic DRNG does not + * serve common users. + */ + memzero_explicit(&seedbuf, sizeof(seedbuf)); +} + +static void lrng_drng_atomic_get(u8 *outbuf, u32 outbuflen) +{ + struct lrng_drng *drng = &lrng_drng_atomic; + unsigned long flags; + + if (!outbuf || !outbuflen) + return; + + outbuflen = min_t(size_t, outbuflen, INT_MAX); + + while (outbuflen) { + u32 todo = min_t(u32, outbuflen, LRNG_DRNG_MAX_REQSIZE); + int ret; + + atomic_dec(&drng->requests); + + spin_lock_irqsave(&drng->spin_lock, flags); + ret = drng->drng_cb->drng_generate(drng->drng, outbuf, todo); + spin_unlock_irqrestore(&drng->spin_lock, flags); + if (ret <= 0) { + pr_warn("getting random data from DRNG failed (%d)\n", + ret); + return; + } + outbuflen -= ret; + outbuf += ret; + } +} + +void lrng_get_random_bytes(void *buf, int nbytes) +{ + lrng_drng_atomic_get((u8 *)buf, (u32)nbytes); + lrng_debug_report_seedlevel("lrng_get_random_bytes"); +} +EXPORT_SYMBOL(lrng_get_random_bytes); diff --git a/drivers/char/lrng/lrng_drng_atomic.h b/drivers/char/lrng/lrng_drng_atomic.h new file mode 100644 index 0000000000000..d5d24fc94f2e2 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_atomic.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_DRNG_ATOMIC_H +#define _LRNG_DRNG_ATOMIC_H + +#include "lrng_drng_mgr.h" + +#ifdef CONFIG_LRNG_DRNG_ATOMIC +void lrng_drng_atomic_reset(void); +void lrng_drng_atomic_seed_drng(struct lrng_drng *drng); +void lrng_drng_atomic_seed_es(void); +void lrng_drng_atomic_force_reseed(void); +#else /* CONFIG_LRNG_DRNG_ATOMIC */ +static inline void lrng_drng_atomic_reset(void) { } +static inline void lrng_drng_atomic_seed_drng(struct lrng_drng *drng) { } +static inline void lrng_drng_atomic_seed_es(void) { } +static inline void lrng_drng_atomic_force_reseed(void) { } +#endif /* CONFIG_LRNG_DRNG_ATOMIC */ + +#endif /* _LRNG_DRNG_ATOMIC_H */ diff --git a/drivers/char/lrng/lrng_drng_chacha20.c b/drivers/char/lrng/lrng_drng_chacha20.c new file mode 100644 index 0000000000000..31be102e30079 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_chacha20.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using + * ChaCha20 cipher implementations. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_drng_chacha20.h" + +/******************************* ChaCha20 DRNG *******************************/ + +#define CHACHA_BLOCK_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32)) + +/* + * Update of the ChaCha20 state by either using an unused buffer part or by + * generating one ChaCha20 block which is half of the state of the ChaCha20. + * The block is XORed into the key part of the state. This shall ensure + * backtracking resistance as well as a proper mix of the ChaCha20 state once + * the key is injected. + */ +static void lrng_chacha20_update(struct chacha20_state *chacha20_state, + __le32 *buf, u32 used_words) +{ + struct chacha20_block *chacha20 = &chacha20_state->block; + u32 i; + __le32 tmp[CHACHA_BLOCK_WORDS]; + + BUILD_BUG_ON(sizeof(struct chacha20_block) != CHACHA_BLOCK_SIZE); + BUILD_BUG_ON(CHACHA_BLOCK_SIZE != 2 * CHACHA_KEY_SIZE); + + if (used_words > CHACHA_KEY_SIZE_WORDS) { + chacha20_block(&chacha20->constants[0], (u8 *)tmp); + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(tmp[i]); + memzero_explicit(tmp, sizeof(tmp)); + } else { + for (i = 0; i < CHACHA_KEY_SIZE_WORDS; i++) + chacha20->key.u[i] ^= le32_to_cpu(buf[i + used_words]); + } + + /* Deterministic increment of nonce as required in RFC 7539 chapter 4 */ + chacha20->nonce[0]++; + if (chacha20->nonce[0] == 0) { + chacha20->nonce[1]++; + if (chacha20->nonce[1] == 0) + chacha20->nonce[2]++; + } + + /* Leave counter untouched as it is start value is undefined in RFC */ +} + +/* + * Seed the ChaCha20 DRNG by injecting the input data into the key part of + * the ChaCha20 state. If the input data is longer than the ChaCha20 key size, + * perform a ChaCha20 operation after processing of key size input data. + * This operation shall spread out the entropy into the ChaCha20 state before + * new entropy is injected into the key part. + */ +static int lrng_cc20_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + + while (inbuflen) { + u32 i, todo = min_t(u32, inbuflen, CHACHA_KEY_SIZE); + + for (i = 0; i < todo; i++) + chacha20->key.b[i] ^= inbuf[i]; + + /* Break potential dependencies between the inbuf key blocks */ + lrng_chacha20_update(chacha20_state, NULL, + CHACHA_BLOCK_WORDS); + inbuf += todo; + inbuflen -= todo; + } + + return 0; +} + +/* + * Chacha20 DRNG generation of random numbers: the stream output of ChaCha20 + * is the random number. After the completion of the generation of the + * stream, the entire ChaCha20 state is updated. + * + * Note, as the ChaCha20 implements a 32 bit counter, we must ensure + * that this function is only invoked for at most 2^32 - 1 ChaCha20 blocks + * before a reseed or an update happens. This is ensured by the variable + * outbuflen which is a 32 bit integer defining the number of bytes to be + * generated by the ChaCha20 DRNG. At the end of this function, an update + * operation is invoked which implies that the 32 bit counter will never be + * overflown in this implementation. + */ +static int lrng_cc20_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + struct chacha20_block *chacha20 = &chacha20_state->block; + __le32 aligned_buf[CHACHA_BLOCK_WORDS]; + u32 ret = outbuflen, used = CHACHA_BLOCK_WORDS; + int zeroize_buf = 0; + + while (outbuflen >= CHACHA_BLOCK_SIZE) { + chacha20_block(&chacha20->constants[0], outbuf); + outbuf += CHACHA_BLOCK_SIZE; + outbuflen -= CHACHA_BLOCK_SIZE; + } + + if (outbuflen) { + chacha20_block(&chacha20->constants[0], (u8 *)aligned_buf); + memcpy(outbuf, aligned_buf, outbuflen); + used = ((outbuflen + sizeof(aligned_buf[0]) - 1) / + sizeof(aligned_buf[0])); + zeroize_buf = 1; + } + + lrng_chacha20_update(chacha20_state, aligned_buf, used); + + if (zeroize_buf) + memzero_explicit(aligned_buf, sizeof(aligned_buf)); + + return ret; +} + +/* + * Allocation of the DRNG state + */ +static void *lrng_cc20_drng_alloc(u32 sec_strength) +{ + struct chacha20_state *state = NULL; + + if (sec_strength > CHACHA_KEY_SIZE) { + pr_err("Security strength of ChaCha20 DRNG (%u bits) lower than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + return ERR_PTR(-EINVAL); + } + if (sec_strength < CHACHA_KEY_SIZE) + pr_warn("Security strength of ChaCha20 DRNG (%u bits) higher than requested by LRNG (%u bits)\n", + CHACHA_KEY_SIZE * 8, sec_strength * 8); + + state = kmalloc(sizeof(struct chacha20_state), GFP_KERNEL); + if (!state) + return ERR_PTR(-ENOMEM); + pr_debug("memory for ChaCha20 core allocated\n"); + + lrng_cc20_init_rfc7539(&state->block); + + return state; +} + +static void lrng_cc20_drng_dealloc(void *drng) +{ + struct chacha20_state *chacha20_state = (struct chacha20_state *)drng; + + pr_debug("ChaCha20 core zeroized and freed\n"); + kfree_sensitive(chacha20_state); +} + +static const char *lrng_cc20_drng_name(void) +{ + return "ChaCha20 DRNG"; +} + +const struct lrng_drng_cb lrng_cc20_drng_cb = { + .drng_name = lrng_cc20_drng_name, + .drng_alloc = lrng_cc20_drng_alloc, + .drng_dealloc = lrng_cc20_drng_dealloc, + .drng_seed = lrng_cc20_drng_seed_helper, + .drng_generate = lrng_cc20_drng_generate_helper, +}; + +#if !defined(CONFIG_LRNG_DFLT_DRNG_CHACHA20) && \ + !defined(CONFIG_LRNG_DRNG_ATOMIC) +static int __init lrng_cc20_drng_init(void) +{ + return lrng_set_drng_cb(&lrng_cc20_drng_cb); +} + +static void __exit lrng_cc20_drng_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_cc20_drng_init); +module_exit(lrng_cc20_drng_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager - ChaCha20-based DRNG backend"); +#endif /* CONFIG_LRNG_DFLT_DRNG_CHACHA20 */ diff --git a/drivers/char/lrng/lrng_drng_chacha20.h b/drivers/char/lrng/lrng_drng_chacha20.h new file mode 100644 index 0000000000000..fee6571281b64 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_chacha20.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG ChaCha20 definitions + * + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_CHACHA20_H +#define _LRNG_CHACHA20_H + +#include + +/* State according to RFC 7539 section 2.3 */ +struct chacha20_block { + u32 constants[4]; + union { +#define CHACHA_KEY_SIZE_WORDS (CHACHA_KEY_SIZE / sizeof(u32)) + u32 u[CHACHA_KEY_SIZE_WORDS]; + u8 b[CHACHA_KEY_SIZE]; + } key; + u32 counter; + u32 nonce[3]; +}; + +struct chacha20_state { + struct chacha20_block block; +}; + +static inline void lrng_cc20_init_rfc7539(struct chacha20_block *chacha20) +{ + chacha_init_consts(chacha20->constants); +} + +#define LRNG_CC20_INIT_RFC7539(x) \ + x.constants[0] = 0x61707865, \ + x.constants[1] = 0x3320646e, \ + x.constants[2] = 0x79622d32, \ + x.constants[3] = 0x6b206574, + +extern const struct lrng_drng_cb lrng_cc20_drng_cb; + +#endif /* _LRNG_CHACHA20_H */ diff --git a/drivers/char/lrng/lrng_drng_drbg.c b/drivers/char/lrng/lrng_drng_drbg.c new file mode 100644 index 0000000000000..1feec2c095a5e --- /dev/null +++ b/drivers/char/lrng/lrng_drng_drbg.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API and its DRBG. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_drng_drbg.h" + +/* + * Define a DRBG plus a hash / MAC used to extract data from the entropy pool. + * For LRNG_HASH_NAME you can use a hash or a MAC (HMAC or CMAC) of your choice + * (Note, you should use the suggested selections below -- using SHA-1 or MD5 + * is not wise). The idea is that the used cipher primitive can be selected to + * be the same as used for the DRBG. I.e. the LRNG only uses one cipher + * primitive using the same cipher implementation with the options offered in + * the following. This means, if the CTR DRBG is selected and AES-NI is present, + * both the CTR DRBG and the selected cmac(aes) use AES-NI. + * + * The security strengths of the DRBGs are all 256 bits according to + * SP800-57 section 5.6.1. + * + * This definition is allowed to be changed. + */ +#ifdef CONFIG_CRYPTO_DRBG_CTR +static unsigned int lrng_drbg_type = 0; +#elif defined CONFIG_CRYPTO_DRBG_HMAC +static unsigned int lrng_drbg_type = 1; +#elif defined CONFIG_CRYPTO_DRBG_HASH +static unsigned int lrng_drbg_type = 2; +#else +#error "Unknown DRBG in use" +#endif + +/* The parameter must be r/o in sysfs as otherwise races appear. */ +module_param(lrng_drbg_type, uint, 0444); +MODULE_PARM_DESC(lrng_drbg_type, "DRBG type used for LRNG (0->CTR_DRBG, 1->HMAC_DRBG, 2->Hash_DRBG)"); + +struct lrng_drbg { + const char *hash_name; + const char *drbg_core; +}; + +static const struct lrng_drbg lrng_drbg_types[] = { + { /* CTR_DRBG with AES-256 using derivation function */ + .drbg_core = "drbg_nopr_ctr_aes256", + }, { /* HMAC_DRBG with SHA-512 */ + .drbg_core = "drbg_nopr_hmac_sha512", + }, { /* Hash_DRBG with SHA-512 using derivation function */ + .drbg_core = "drbg_nopr_sha512" + } +}; + +static int lrng_drbg_drng_seed_helper(void *drng, const u8 *inbuf, u32 inbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + LIST_HEAD(seedlist); + struct drbg_string data; + int ret; + + drbg_string_fill(&data, inbuf, inbuflen); + list_add_tail(&data.list, &seedlist); + ret = drbg->d_ops->update(drbg, &seedlist, drbg->seeded); + + if (ret >= 0) + drbg->seeded = true; + + return ret; +} + +static int lrng_drbg_drng_generate_helper(void *drng, u8 *outbuf, u32 outbuflen) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + return drbg->d_ops->generate(drbg, outbuf, outbuflen, NULL); +} + +static void *lrng_drbg_drng_alloc(u32 sec_strength) +{ + struct drbg_state *drbg; + int coreref = -1; + bool pr = false; + int ret; + + drbg_convert_tfm_core(lrng_drbg_types[lrng_drbg_type].drbg_core, + &coreref, &pr); + if (coreref < 0) + return ERR_PTR(-EFAULT); + + drbg = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); + if (!drbg) + return ERR_PTR(-ENOMEM); + + drbg->core = &drbg_cores[coreref]; + drbg->seeded = false; + ret = drbg_alloc_state(drbg); + if (ret) + goto err; + + if (sec_strength > drbg_sec_strength(drbg->core->flags)) { + pr_err("Security strength of DRBG (%u bits) lower than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + goto dealloc; + } + + if (sec_strength < drbg_sec_strength(drbg->core->flags)) + pr_warn("Security strength of DRBG (%u bits) higher than requested by LRNG (%u bits)\n", + drbg_sec_strength(drbg->core->flags) * 8, + sec_strength * 8); + + pr_info("DRBG with %s core allocated\n", drbg->core->backend_cra_name); + + return drbg; + +dealloc: + if (drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); +err: + kfree(drbg); + return ERR_PTR(-EINVAL); +} + +static void lrng_drbg_drng_dealloc(void *drng) +{ + struct drbg_state *drbg = (struct drbg_state *)drng; + + if (drbg && drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); + drbg_dealloc_state(drbg); + kfree_sensitive(drbg); + pr_info("DRBG deallocated\n"); +} + +static const char *lrng_drbg_name(void) +{ + return lrng_drbg_types[lrng_drbg_type].drbg_core; +} + +const struct lrng_drng_cb lrng_drbg_cb = { + .drng_name = lrng_drbg_name, + .drng_alloc = lrng_drbg_drng_alloc, + .drng_dealloc = lrng_drbg_drng_dealloc, + .drng_seed = lrng_drbg_drng_seed_helper, + .drng_generate = lrng_drbg_drng_generate_helper, +}; + +#ifndef CONFIG_LRNG_DFLT_DRNG_DRBG +static int __init lrng_drbg_init(void) +{ + if (lrng_drbg_type >= ARRAY_SIZE(lrng_drbg_types)) { + pr_err("lrng_drbg_type parameter too large (given %u - max: %lu)", + lrng_drbg_type, + (unsigned long)ARRAY_SIZE(lrng_drbg_types) - 1); + return -EAGAIN; + } + return lrng_set_drng_cb(&lrng_drbg_cb); +} + +static void __exit lrng_drbg_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_drbg_init); +module_exit(lrng_drbg_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager - SP800-90A DRBG backend"); +#endif /* CONFIG_LRNG_DFLT_DRNG_DRBG */ diff --git a/drivers/char/lrng/lrng_drng_drbg.h b/drivers/char/lrng/lrng_drng_drbg.h new file mode 100644 index 0000000000000..b3d556caf294e --- /dev/null +++ b/drivers/char/lrng/lrng_drng_drbg.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG SP800-90A definitions + * + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_DRBG_H +#define _LRNG_DRBG_H + +extern const struct lrng_drng_cb lrng_drbg_cb; + +#endif /* _LRNG_DRBG_H */ diff --git a/drivers/char/lrng/lrng_drng_kcapi.c b/drivers/char/lrng/lrng_drng_kcapi.c new file mode 100644 index 0000000000000..a204bcf52a9ac --- /dev/null +++ b/drivers/char/lrng/lrng_drng_kcapi.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the cryptographic primitives using the + * kernel crypto API. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_drng_kcapi.h" + +static char *drng_name = NULL; +module_param(drng_name, charp, 0444); +MODULE_PARM_DESC(drng_name, "Kernel crypto API name of DRNG"); + +static char *seed_hash = NULL; +module_param(seed_hash, charp, 0444); +MODULE_PARM_DESC(seed_hash, + "Kernel crypto API name of hash with output size equal to seedsize of DRNG to bring seed string to the size required by the DRNG"); + +struct lrng_drng_info { + struct crypto_rng *kcapi_rng; + struct crypto_shash *hash_tfm; +}; + +static int lrng_kcapi_drng_seed_helper(void *drng, const u8 *inbuf, + u32 inbuflen) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + struct crypto_shash *hash_tfm = lrng_drng_info->hash_tfm; + SHASH_DESC_ON_STACK(shash, hash_tfm); + u32 digestsize; + u8 digest[HASH_MAX_DIGESTSIZE] __aligned(8); + int ret; + + if (!hash_tfm) + return crypto_rng_reset(kcapi_rng, inbuf, inbuflen); + + shash->tfm = hash_tfm; + digestsize = crypto_shash_digestsize(hash_tfm); + + ret = crypto_shash_digest(shash, inbuf, inbuflen, digest); + shash_desc_zero(shash); + if (ret) + return ret; + + ret = crypto_rng_reset(kcapi_rng, digest, digestsize); + if (ret) + return ret; + + memzero_explicit(digest, digestsize); + return 0; +} + +static int lrng_kcapi_drng_generate_helper(void *drng, u8 *outbuf, + u32 outbuflen) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + int ret = crypto_rng_get_bytes(kcapi_rng, outbuf, outbuflen); + + if (ret < 0) + return ret; + + return outbuflen; +} + +static void *lrng_kcapi_drng_alloc(u32 sec_strength) +{ + struct lrng_drng_info *lrng_drng_info; + struct crypto_rng *kcapi_rng; + u32 time = random_get_entropy(); + int seedsize, rv; + void *ret = ERR_PTR(-ENOMEM); + + if (!drng_name) { + pr_err("DRNG name missing\n"); + return ERR_PTR(-EINVAL); + } + + if (!memcmp(drng_name, "stdrng", 6) || + !memcmp(drng_name, "lrng", 4) || + !memcmp(drng_name, "drbg", 4) || + !memcmp(drng_name, "jitterentropy_rng", 17)) { + pr_err("Refusing to load the requested random number generator\n"); + return ERR_PTR(-EINVAL); + } + + lrng_drng_info = kzalloc(sizeof(*lrng_drng_info), GFP_KERNEL); + if (!lrng_drng_info) + return ERR_PTR(-ENOMEM); + + kcapi_rng = crypto_alloc_rng(drng_name, 0, 0); + if (IS_ERR(kcapi_rng)) { + pr_err("DRNG %s cannot be allocated\n", drng_name); + ret = ERR_CAST(kcapi_rng); + goto free; + } + + lrng_drng_info->kcapi_rng = kcapi_rng; + + seedsize = crypto_rng_seedsize(kcapi_rng); + if (seedsize) { + struct crypto_shash *hash_tfm; + + if (!seed_hash) { + switch (seedsize) { + case 32: + seed_hash = "sha256"; + break; + case 48: + seed_hash = "sha384"; + break; + case 64: + seed_hash = "sha512"; + break; + default: + pr_err("Seed size %d cannot be processed\n", + seedsize); + goto dealloc; + } + } + + hash_tfm = crypto_alloc_shash(seed_hash, 0, 0); + if (IS_ERR(hash_tfm)) { + ret = ERR_CAST(hash_tfm); + goto dealloc; + } + + if (seedsize != crypto_shash_digestsize(hash_tfm)) { + pr_err("Seed hash output size not equal to DRNG seed size\n"); + crypto_free_shash(hash_tfm); + ret = ERR_PTR(-EINVAL); + goto dealloc; + } + + lrng_drng_info->hash_tfm = hash_tfm; + + pr_info("Seed hash %s allocated\n", seed_hash); + } + + rv = lrng_kcapi_drng_seed_helper(lrng_drng_info, (u8 *)(&time), + sizeof(time)); + if (rv) { + ret = ERR_PTR(rv); + goto dealloc; + } + + pr_info("Kernel crypto API DRNG %s allocated\n", drng_name); + + return lrng_drng_info; + +dealloc: + crypto_free_rng(kcapi_rng); +free: + kfree(lrng_drng_info); + return ret; +} + +static void lrng_kcapi_drng_dealloc(void *drng) +{ + struct lrng_drng_info *lrng_drng_info = (struct lrng_drng_info *)drng; + struct crypto_rng *kcapi_rng = lrng_drng_info->kcapi_rng; + + crypto_free_rng(kcapi_rng); + if (lrng_drng_info->hash_tfm) + crypto_free_shash(lrng_drng_info->hash_tfm); + kfree(lrng_drng_info); + pr_info("DRNG %s deallocated\n", drng_name); +} + +static const char *lrng_kcapi_drng_name(void) +{ + return drng_name; +} + +const struct lrng_drng_cb lrng_kcapi_drng_cb = { + .drng_name = lrng_kcapi_drng_name, + .drng_alloc = lrng_kcapi_drng_alloc, + .drng_dealloc = lrng_kcapi_drng_dealloc, + .drng_seed = lrng_kcapi_drng_seed_helper, + .drng_generate = lrng_kcapi_drng_generate_helper, +}; + +#ifndef CONFIG_LRNG_DFLT_DRNG_KCAPI +static int __init lrng_kcapi_init(void) +{ + return lrng_set_drng_cb(&lrng_kcapi_drng_cb); +} +static void __exit lrng_kcapi_exit(void) +{ + lrng_set_drng_cb(NULL); +} + +late_initcall(lrng_kcapi_init); +module_exit(lrng_kcapi_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager - kernel crypto API DRNG backend"); +#endif /* CONFIG_LRNG_DFLT_DRNG_KCAPI */ diff --git a/drivers/char/lrng/lrng_drng_kcapi.h b/drivers/char/lrng/lrng_drng_kcapi.h new file mode 100644 index 0000000000000..5db25aaf830c5 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_kcapi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG kernel crypto API DRNG definition + * + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_KCAPI_DRNG_H +#define _LRNG_KCAPI_DRNG_H + +extern const struct lrng_drng_cb lrng_kcapi_drng_cb; + +#endif /* _LRNG_KCAPI_DRNG_H */ diff --git a/drivers/char/lrng/lrng_drng_mgr.c b/drivers/char/lrng/lrng_drng_mgr.c new file mode 100644 index 0000000000000..4c31cbfc79b68 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_mgr.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG management + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_drng_atomic.h" +#include "lrng_drng_chacha20.h" +#include "lrng_drng_drbg.h" +#include "lrng_drng_kcapi.h" +#include "lrng_drng_mgr.h" +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_interface_random_kernel.h" +#include "lrng_numa.h" +#include "lrng_sha.h" + +/* + * Maximum number of seconds between DRNG reseed intervals of the DRNG. Note, + * this is enforced with the next request of random numbers from the + * DRNG. Setting this value to zero implies a reseeding attempt before every + * generated random number. + */ +int lrng_drng_reseed_max_time = 600; + +/* + * Is LRNG for general-purpose use (i.e. is at least the lrng_drng_init + * fully allocated)? + */ +static atomic_t lrng_avail = ATOMIC_INIT(0); + +/* Guard protecting all crypto callback update operation of all DRNGs. */ +DEFINE_MUTEX(lrng_crypto_cb_update); + +/* + * Default hash callback that provides the crypto primitive right from the + * kernel start. It must not perform any memory allocation operation, but + * simply perform the hash calculation. + */ +const struct lrng_hash_cb *lrng_default_hash_cb = &lrng_sha_hash_cb; + +/* + * Default DRNG callback that provides the crypto primitive which is + * allocated either during late kernel boot stage. So, it is permissible for + * the callback to perform memory allocation operations. + */ +const struct lrng_drng_cb *lrng_default_drng_cb = +#if defined(CONFIG_LRNG_DFLT_DRNG_CHACHA20) + &lrng_cc20_drng_cb; +#elif defined(CONFIG_LRNG_DFLT_DRNG_DRBG) + &lrng_drbg_cb; +#elif defined(CONFIG_LRNG_DFLT_DRNG_KCAPI) + &lrng_kcapi_drng_cb; +#else +#error "Unknown default DRNG selected" +#endif + +/* DRNG for non-atomic use cases */ +static struct lrng_drng lrng_drng_init = { + LRNG_DRNG_STATE_INIT(lrng_drng_init, NULL, NULL, NULL, + &lrng_sha_hash_cb), + .lock = __MUTEX_INITIALIZER(lrng_drng_init.lock), +}; + +static u32 max_wo_reseed = LRNG_DRNG_MAX_WITHOUT_RESEED; +#ifdef CONFIG_LRNG_RUNTIME_MAX_WO_RESEED_CONFIG +module_param(max_wo_reseed, uint, 0444); +MODULE_PARM_DESC(max_wo_reseed, + "Maximum number of DRNG generate operation without full reseed\n"); +#endif + +/* Wait queue to wait until the LRNG is initialized - can freely be used */ +DECLARE_WAIT_QUEUE_HEAD(lrng_init_wait); + +/********************************** Helper ************************************/ + +bool lrng_get_available(void) +{ + return likely(atomic_read(&lrng_avail)); +} + +struct lrng_drng *lrng_drng_init_instance(void) +{ + return &lrng_drng_init; +} + +struct lrng_drng *lrng_drng_node_instance(void) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + int node = numa_node_id(); + + if (lrng_drng && lrng_drng[node]) + return lrng_drng[node]; + + return lrng_drng_init_instance(); +} + +void lrng_drng_reset(struct lrng_drng *drng) +{ + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + atomic_set(&drng->requests_since_fully_seeded, 0); + drng->last_seeded = jiffies; + drng->fully_seeded = false; + drng->force_reseed = true; + pr_debug("reset DRNG\n"); +} + +/* Initialize the DRNG, except the mutex lock */ +int lrng_drng_alloc_common(struct lrng_drng *drng, + const struct lrng_drng_cb *drng_cb) +{ + if (!drng || !drng_cb) + return -EINVAL; + + drng->drng_cb = drng_cb; + drng->drng = drng_cb->drng_alloc(LRNG_DRNG_SECURITY_STRENGTH_BYTES); + if (IS_ERR(drng->drng)) + return PTR_ERR(drng->drng); + + lrng_drng_reset(drng); + return 0; +} + +/* Initialize the default DRNG during boot and perform its seeding */ +int lrng_drng_initalize(void) +{ + int ret; + + if (lrng_get_available()) + return 0; + + /* Catch programming error */ + WARN_ON(lrng_drng_init.hash_cb != lrng_default_hash_cb); + + mutex_lock(&lrng_drng_init.lock); + if (lrng_get_available()) { + mutex_unlock(&lrng_drng_init.lock); + return 0; + } + + ret = lrng_drng_alloc_common(&lrng_drng_init, lrng_default_drng_cb); + mutex_unlock(&lrng_drng_init.lock); + if (ret) + return ret; + + pr_debug("LRNG for general use is available\n"); + atomic_set(&lrng_avail, 1); + + /* Seed the DRNG with any entropy available */ + if (!lrng_pool_trylock()) { + pr_info("Initial DRNG initialized triggering first seeding\n"); + lrng_drng_seed_work(NULL); + } else { + pr_info("Initial DRNG initialized without seeding\n"); + } + + return 0; +} + +static int __init lrng_drng_make_available(void) +{ + return lrng_drng_initalize(); +} +late_initcall(lrng_drng_make_available); + +bool lrng_sp80090c_compliant(void) +{ + /* SP800-90C compliant oversampling is only requested in FIPS mode */ + return fips_enabled; +} + +/************************* Random Number Generation ***************************/ + +/* Inject a data buffer into the DRNG - caller must hold its lock */ +void lrng_drng_inject(struct lrng_drng *drng, const u8 *inbuf, u32 inbuflen, + bool fully_seeded, const char *drng_type) +{ + BUILD_BUG_ON(LRNG_DRNG_RESEED_THRESH > INT_MAX); + pr_debug("seeding %s DRNG with %u bytes\n", drng_type, inbuflen); + if (drng->drng_cb->drng_seed(drng->drng, inbuf, inbuflen) < 0) { + pr_warn("seeding of %s DRNG failed\n", drng_type); + drng->force_reseed = true; + } else { + int gc = LRNG_DRNG_RESEED_THRESH - atomic_read(&drng->requests); + + pr_debug("%s DRNG stats since last seeding: %lu secs; generate calls: %d\n", + drng_type, + (time_after(jiffies, drng->last_seeded) ? + (jiffies - drng->last_seeded) : 0) / HZ, gc); + + /* Count the numbers of generate ops since last fully seeded */ + if (fully_seeded) + atomic_set(&drng->requests_since_fully_seeded, 0); + else + atomic_add(gc, &drng->requests_since_fully_seeded); + + drng->last_seeded = jiffies; + atomic_set(&drng->requests, LRNG_DRNG_RESEED_THRESH); + drng->force_reseed = false; + + if (!drng->fully_seeded) { + drng->fully_seeded = fully_seeded; + if (drng->fully_seeded) + pr_debug("%s DRNG fully seeded\n", drng_type); + } + } +} + +/* Perform the seeding of the DRNG with data from entropy source */ +static void lrng_drng_seed_es(struct lrng_drng *drng) +{ + struct entropy_buf seedbuf __aligned(LRNG_KCAPI_ALIGN); + + lrng_fill_seed_buffer(&seedbuf, + lrng_get_seed_entropy_osr(drng->fully_seeded)); + + mutex_lock(&drng->lock); + lrng_drng_inject(drng, (u8 *)&seedbuf, sizeof(seedbuf), + lrng_fully_seeded_eb(drng->fully_seeded, &seedbuf), + "regular"); + mutex_unlock(&drng->lock); + + /* Set the seeding state of the LRNG */ + lrng_init_ops(&seedbuf); + + memzero_explicit(&seedbuf, sizeof(seedbuf)); +} + +static void lrng_drng_seed(struct lrng_drng *drng) +{ + BUILD_BUG_ON(LRNG_MIN_SEED_ENTROPY_BITS > + LRNG_DRNG_SECURITY_STRENGTH_BITS); + + if (lrng_get_available()) { + /* (Re-)Seed DRNG */ + lrng_drng_seed_es(drng); + /* (Re-)Seed atomic DRNG from regular DRNG */ + lrng_drng_atomic_seed_drng(drng); + } else { + /* + * If no-one is waiting for the DRNG, seed the atomic DRNG + * directly from the entropy sources. + */ + if (!wq_has_sleeper(&lrng_init_wait) && + !lrng_ready_chain_has_sleeper()) + lrng_drng_atomic_seed_es(); + else + lrng_init_ops(NULL); + } +} + +static void _lrng_drng_seed_work(struct lrng_drng *drng, u32 node) +{ + pr_debug("reseed triggered by system events for DRNG on NUMA node %d\n", + node); + lrng_drng_seed(drng); + if (drng->fully_seeded) { + /* Prevent reseed storm */ + drng->last_seeded += node * 100 * HZ; + } +} + +/* + * DRNG reseed trigger: Kernel thread handler triggered by the schedule_work() + */ +void lrng_drng_seed_work(struct work_struct *dummy) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + if (lrng_drng) { + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (drng && !drng->fully_seeded) { + _lrng_drng_seed_work(drng, node); + goto out; + } + } + } else { + if (!lrng_drng_init.fully_seeded) { + _lrng_drng_seed_work(&lrng_drng_init, 0); + goto out; + } + } + + lrng_pool_all_numa_nodes_seeded(true); + +out: + /* Allow the seeding operation to be called again */ + lrng_pool_unlock(); +} + +/* Force all DRNGs to reseed before next generation */ +void lrng_drng_force_reseed(void) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + u32 node; + + /* + * If the initial DRNG is over the reseed threshold, allow a forced + * reseed only for the initial DRNG as this is the fallback for all. It + * must be kept seeded before all others to keep the LRNG operational. + */ + if (!lrng_drng || + (atomic_read_u32(&lrng_drng_init.requests_since_fully_seeded) > + LRNG_DRNG_RESEED_THRESH)) { + lrng_drng_init.force_reseed = lrng_drng_init.fully_seeded; + pr_debug("force reseed of initial DRNG\n"); + return; + } + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + + drng->force_reseed = drng->fully_seeded; + pr_debug("force reseed of DRNG on node %u\n", node); + } + lrng_drng_atomic_force_reseed(); +} +EXPORT_SYMBOL(lrng_drng_force_reseed); + +static bool lrng_drng_must_reseed(struct lrng_drng *drng) +{ + return (atomic_dec_and_test(&drng->requests) || + drng->force_reseed || + time_after(jiffies, + drng->last_seeded + lrng_drng_reseed_max_time * HZ)); +} + +/* + * lrng_drng_get() - Get random data out of the DRNG which is reseeded + * frequently. + * + * @drng: DRNG instance + * @outbuf: buffer for storing random data + * @outbuflen: length of outbuf + * + * Return: + * * < 0 in error case (DRNG generation or update failed) + * * >=0 returning the returned number of bytes + */ +int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen) +{ + u32 processed = 0; + + if (!outbuf || !outbuflen) + return 0; + + if (!lrng_get_available()) + return -EOPNOTSUPP; + + outbuflen = min_t(size_t, outbuflen, INT_MAX); + + /* If DRNG operated without proper reseed for too long, block LRNG */ + BUILD_BUG_ON(LRNG_DRNG_MAX_WITHOUT_RESEED < LRNG_DRNG_RESEED_THRESH); + if (atomic_read_u32(&drng->requests_since_fully_seeded) > max_wo_reseed) + lrng_unset_fully_seeded(drng); + + while (outbuflen) { + u32 todo = min_t(u32, outbuflen, LRNG_DRNG_MAX_REQSIZE); + int ret; + + if (lrng_drng_must_reseed(drng)) { + if (lrng_pool_trylock()) { + drng->force_reseed = true; + } else { + lrng_drng_seed(drng); + lrng_pool_unlock(); + } + } + + mutex_lock(&drng->lock); + ret = drng->drng_cb->drng_generate(drng->drng, + outbuf + processed, todo); + mutex_unlock(&drng->lock); + if (ret <= 0) { + pr_warn("getting random data from DRNG failed (%d)\n", + ret); + return -EFAULT; + } + processed += ret; + outbuflen -= ret; + } + + return processed; +} + +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = &lrng_drng_init; + int ret, node = numa_node_id(); + + might_sleep(); + + if (lrng_drng && lrng_drng[node] && lrng_drng[node]->fully_seeded) + drng = lrng_drng[node]; + + ret = lrng_drng_initalize(); + if (ret) + return ret; + + return lrng_drng_get(drng, outbuf, outbuflen); +} + +/* Reset LRNG such that all existing entropy is gone */ +static void _lrng_reset(struct work_struct *work) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + + if (!lrng_drng) { + mutex_lock(&lrng_drng_init.lock); + lrng_drng_reset(&lrng_drng_init); + mutex_unlock(&lrng_drng_init.lock); + } else { + u32 node; + + for_each_online_node(node) { + struct lrng_drng *drng = lrng_drng[node]; + + if (!drng) + continue; + mutex_lock(&drng->lock); + lrng_drng_reset(drng); + mutex_unlock(&drng->lock); + } + } + lrng_drng_atomic_reset(); + lrng_set_entropy_thresh(LRNG_INIT_ENTROPY_BITS); + + lrng_reset_state(); +} + +static DECLARE_WORK(lrng_reset_work, _lrng_reset); + +void lrng_reset(void) +{ + schedule_work(&lrng_reset_work); +} + +/******************* Generic LRNG kernel output interfaces ********************/ + +int lrng_drng_sleep_while_nonoperational(int nonblock) +{ + if (likely(lrng_state_operational())) + return 0; + if (nonblock) + return -EAGAIN; + return wait_event_interruptible(lrng_init_wait, + lrng_state_operational()); +} + +int lrng_drng_sleep_while_non_min_seeded(void) +{ + if (likely(lrng_state_min_seeded())) + return 0; + return wait_event_interruptible(lrng_init_wait, + lrng_state_min_seeded()); +} + +void lrng_get_random_bytes_full(void *buf, int nbytes) +{ + lrng_drng_sleep_while_nonoperational(0); + lrng_drng_get_sleep((u8 *)buf, (u32)nbytes); +} +EXPORT_SYMBOL(lrng_get_random_bytes_full); + +void lrng_get_random_bytes_min(void *buf, int nbytes) +{ + lrng_drng_sleep_while_non_min_seeded(); + lrng_drng_get_sleep((u8 *)buf, (u32)nbytes); +} +EXPORT_SYMBOL(lrng_get_random_bytes_min); diff --git a/drivers/char/lrng/lrng_drng_mgr.h b/drivers/char/lrng/lrng_drng_mgr.h new file mode 100644 index 0000000000000..34ca5913dbd20 --- /dev/null +++ b/drivers/char/lrng/lrng_drng_mgr.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_DRNG_H +#define _LRNG_DRNG_H + +#include +#include +#include + +#include "lrng_definitions.h" + +extern struct wait_queue_head lrng_init_wait; +extern int lrng_drng_reseed_max_time; +extern struct mutex lrng_crypto_cb_update; +extern const struct lrng_drng_cb *lrng_default_drng_cb; +extern const struct lrng_hash_cb *lrng_default_hash_cb; + +/* DRNG state handle */ +struct lrng_drng { + void *drng; /* DRNG handle */ + void *hash; /* Hash handle */ + const struct lrng_drng_cb *drng_cb; /* DRNG callbacks */ + const struct lrng_hash_cb *hash_cb; /* Hash callbacks */ + atomic_t requests; /* Number of DRNG requests */ + atomic_t requests_since_fully_seeded; /* Number DRNG requests since + * last fully seeded + */ + unsigned long last_seeded; /* Last time it was seeded */ + bool fully_seeded; /* Is DRNG fully seeded? */ + bool force_reseed; /* Force a reseed */ + + rwlock_t hash_lock; /* Lock hash_cb replacement */ + /* Lock write operations on DRNG state, DRNG replacement of drng_cb */ + struct mutex lock; /* Non-atomic DRNG operation */ + spinlock_t spin_lock; /* Atomic DRNG operation */ +}; + +#define LRNG_DRNG_STATE_INIT(x, d, h, d_cb, h_cb) \ + .drng = d, \ + .hash = h, \ + .drng_cb = d_cb, \ + .hash_cb = h_cb, \ + .requests = ATOMIC_INIT(LRNG_DRNG_RESEED_THRESH),\ + .requests_since_fully_seeded = ATOMIC_INIT(0), \ + .last_seeded = 0, \ + .fully_seeded = false, \ + .force_reseed = true, \ + .hash_lock = __RW_LOCK_UNLOCKED(x.hash_lock) + +struct lrng_drng *lrng_drng_init_instance(void); +struct lrng_drng *lrng_drng_node_instance(void); + +void lrng_reset(void); +int lrng_drng_alloc_common(struct lrng_drng *drng, + const struct lrng_drng_cb *crypto_cb); +int lrng_drng_initalize(void); +bool lrng_sp80090c_compliant(void); +bool lrng_get_available(void); +void lrng_drng_reset(struct lrng_drng *drng); +void lrng_drng_inject(struct lrng_drng *drng, const u8 *inbuf, u32 inbuflen, + bool fully_seeded, const char *drng_type); +int lrng_drng_get(struct lrng_drng *drng, u8 *outbuf, u32 outbuflen); +int lrng_drng_sleep_while_nonoperational(int nonblock); +int lrng_drng_sleep_while_non_min_seeded(void); +int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen); +void lrng_drng_seed_work(struct work_struct *dummy); +void lrng_drng_force_reseed(void); + +static inline u32 lrng_compress_osr(void) +{ + return lrng_sp80090c_compliant() ? LRNG_OVERSAMPLE_ES_BITS : 0; +} + +static inline u32 lrng_reduce_by_osr(u32 entropy_bits) +{ + u32 osr_bits = lrng_compress_osr(); + + return (entropy_bits >= osr_bits) ? (entropy_bits - osr_bits) : 0; +} + +#endif /* _LRNG_DRNG_H */ diff --git a/drivers/char/lrng/lrng_es_aux.c b/drivers/char/lrng/lrng_es_aux.c new file mode 100644 index 0000000000000..245bc829998be --- /dev/null +++ b/drivers/char/lrng/lrng_es_aux.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Auxiliary entropy pool + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_sysctl.h" + +/* + * This is the auxiliary pool + * + * The aux pool array is aligned to 8 bytes to comfort the kernel crypto API + * cipher implementations of the hash functions used to read the pool: for some + * accelerated implementations, we need an alignment to avoid a realignment + * which involves memcpy(). The alignment to 8 bytes should satisfy all crypto + * implementations. + */ +struct lrng_pool { + u8 aux_pool[LRNG_POOL_SIZE]; /* Aux pool: digest state */ + atomic_t aux_entropy_bits; + atomic_t digestsize; /* Digest size of used hash */ + bool initialized; /* Aux pool initialized? */ + + /* Serialize read of entropy pool and update of aux pool */ + spinlock_t lock; +}; + +static struct lrng_pool lrng_pool __aligned(LRNG_KCAPI_ALIGN) = { + .aux_entropy_bits = ATOMIC_INIT(0), + .digestsize = ATOMIC_INIT(LRNG_ATOMIC_DIGEST_SIZE), + .initialized = false, + .lock = __SPIN_LOCK_UNLOCKED(lrng_pool.lock) +}; + +/********************************** Helper ***********************************/ + +/* Entropy in bits present in aux pool */ +static u32 lrng_aux_avail_entropy(u32 __unused) +{ + /* Cap available entropy with max entropy */ + u32 avail_bits = min_t(u32, lrng_get_digestsize(), + atomic_read_u32(&lrng_pool.aux_entropy_bits)); + + /* Consider oversampling rate due to aux pool conditioning */ + return lrng_reduce_by_osr(avail_bits); +} + +/* Set the digest size of the used hash in bytes */ +static void lrng_set_digestsize(u32 digestsize) +{ + struct lrng_pool *pool = &lrng_pool; + u32 ent_bits = atomic_xchg_relaxed(&pool->aux_entropy_bits, 0), + old_digestsize = lrng_get_digestsize(); + + atomic_set(&lrng_pool.digestsize, digestsize); + + /* + * Update the write wakeup threshold which must not be larger + * than the digest size of the current conditioning hash. + */ + digestsize = lrng_reduce_by_osr(digestsize << 3); + lrng_sysctl_update_max_write_thresh(digestsize); + lrng_write_wakeup_bits = digestsize; + + /* + * In case the new digest is larger than the old one, cap the available + * entropy to the old message digest used to process the existing data. + */ + ent_bits = min_t(u32, ent_bits, old_digestsize); + atomic_add(ent_bits, &pool->aux_entropy_bits); +} + +static int __init lrng_init_wakeup_bits(void) +{ + u32 digestsize = lrng_reduce_by_osr(lrng_get_digestsize()); + + lrng_sysctl_update_max_write_thresh(digestsize); + lrng_write_wakeup_bits = digestsize; + return 0; +} +core_initcall(lrng_init_wakeup_bits); + +/* Obtain the digest size provided by the used hash in bits */ +u32 lrng_get_digestsize(void) +{ + return atomic_read_u32(&lrng_pool.digestsize) << 3; +} + +/* Set entropy content in user-space controllable aux pool */ +void lrng_pool_set_entropy(u32 entropy_bits) +{ + atomic_set(&lrng_pool.aux_entropy_bits, entropy_bits); +} + +static void lrng_aux_reset(void) +{ + lrng_pool_set_entropy(0); +} + +/* + * Replace old with new hash for auxiliary pool handling + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the write lock against pointer updating). + */ +static int +lrng_aux_switch_hash(struct lrng_drng *drng, int __unused, + const struct lrng_hash_cb *new_cb, void *new_hash, + const struct lrng_hash_cb *old_cb) +{ + struct lrng_drng *init_drng = lrng_drng_init_instance(); + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + u8 digest[LRNG_MAX_DIGESTSIZE]; + int ret; + + if (!IS_ENABLED(CONFIG_LRNG_SWITCH)) + return -EOPNOTSUPP; + + if (unlikely(!pool->initialized)) + return 0; + + /* We only switch if the processed DRNG is the initial DRNG. */ + if (init_drng != drng) + return 0; + + /* Get the aux pool hash with old digest ... */ + ret = old_cb->hash_final(shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->hash_init(shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->hash_update(shash, digest, sizeof(digest)); + if (!ret) { + lrng_set_digestsize(new_cb->hash_digestsize(new_hash)); + pr_debug("Re-initialize aux entropy pool with hash %s\n", + new_cb->hash_name()); + } + + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* Insert data into auxiliary pool by using the hash update function. */ +static int +lrng_aux_pool_insert_locked(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_hash_cb *hash_cb; + unsigned long flags; + void *hash; + int ret; + + entropy_bits = min_t(u32, entropy_bits, inbuflen << 3); + + read_lock_irqsave(&drng->hash_lock, flags); + hash_cb = drng->hash_cb; + hash = drng->hash; + + if (unlikely(!pool->initialized)) { + ret = hash_cb->hash_init(shash, hash); + if (ret) + goto out; + pool->initialized = true; + } + + ret = hash_cb->hash_update(shash, inbuf, inbuflen); + if (ret) + goto out; + + /* + * Cap the available entropy to the hash output size compliant to + * SP800-90B section 3.1.5.1 table 1. + */ + entropy_bits += atomic_read_u32(&pool->aux_entropy_bits); + atomic_set(&pool->aux_entropy_bits, + min_t(u32, entropy_bits, + hash_cb->hash_digestsize(hash) << 3)); + +out: + read_unlock_irqrestore(&drng->hash_lock, flags); + return ret; +} + +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + int ret; + + spin_lock_irqsave(&pool->lock, flags); + ret = lrng_aux_pool_insert_locked(inbuf, inbuflen, entropy_bits); + spin_unlock_irqrestore(&pool->lock, flags); + + lrng_es_add_entropy(); + + return ret; +} +EXPORT_SYMBOL(lrng_pool_insert_aux); + +/************************* Get data from entropy pool *************************/ + +/* + * Get auxiliary entropy pool and its entropy content for seed buffer. + * Caller must hold lrng_pool.pool->lock. + * @outbuf: buffer to store data in with size requested_bits + * @requested_bits: Requested amount of entropy + * @return: amount of entropy in outbuf in bits. + */ +static u32 lrng_aux_get_pool(u8 *outbuf, u32 requested_bits) +{ + struct lrng_pool *pool = &lrng_pool; + struct shash_desc *shash = (struct shash_desc *)pool->aux_pool; + struct lrng_drng *drng = lrng_drng_init_instance(); + const struct lrng_hash_cb *hash_cb; + unsigned long flags; + void *hash; + u32 collected_ent_bits, returned_ent_bits, unused_bits = 0, + digestsize, digestsize_bits, requested_bits_osr; + u8 aux_output[LRNG_MAX_DIGESTSIZE]; + + if (unlikely(!pool->initialized)) + return 0; + + read_lock_irqsave(&drng->hash_lock, flags); + + hash_cb = drng->hash_cb; + hash = drng->hash; + digestsize = hash_cb->hash_digestsize(hash); + digestsize_bits = digestsize << 3; + + /* Cap to maximum entropy that can ever be generated with given hash */ + lrng_cap_requested(digestsize_bits, requested_bits); + + /* Ensure that no more than the size of aux_pool can be requested */ + requested_bits = min_t(u32, requested_bits, (LRNG_MAX_DIGESTSIZE << 3)); + requested_bits_osr = requested_bits + lrng_compress_osr(); + + /* Cap entropy with entropy counter from aux pool and the used digest */ + collected_ent_bits = min_t(u32, digestsize_bits, + atomic_xchg_relaxed(&pool->aux_entropy_bits, 0)); + + /* We collected too much entropy and put the overflow back */ + if (collected_ent_bits > requested_bits_osr) { + /* Amount of bits we collected too much */ + unused_bits = collected_ent_bits - requested_bits_osr; + /* Put entropy back */ + atomic_add(unused_bits, &pool->aux_entropy_bits); + /* Fix collected entropy */ + collected_ent_bits = requested_bits_osr; + } + + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from aux pool, %u bits of entropy remaining\n", + returned_ent_bits, collected_ent_bits, unused_bits); + + /* Get the digest for the aux pool to be returned to the caller ... */ + if (hash_cb->hash_final(shash, aux_output) || + /* + * ... and re-initialize the aux state. Do not add the aux pool + * digest for backward secrecy as it will be added with the + * insertion of the complete seed buffer after it has been filled. + */ + hash_cb->hash_init(shash, hash)) { + returned_ent_bits = 0; + } else { + /* + * Do not truncate the output size exactly to collected_ent_bits + * as the aux pool may contain data that is not credited with + * entropy, but we want to use them to stir the DRNG state. + */ + memcpy(outbuf, aux_output, requested_bits >> 3); + } + + read_unlock_irqrestore(&drng->hash_lock, flags); + memzero_explicit(aux_output, digestsize); + return returned_ent_bits; +} + +static void lrng_aux_get_backtrack(struct entropy_buf *eb, u32 requested_bits, + bool __unused) +{ + struct lrng_pool *pool = &lrng_pool; + unsigned long flags; + + /* Ensure aux pool extraction and backtracking op are atomic */ + spin_lock_irqsave(&pool->lock, flags); + + eb->e_bits[lrng_ext_es_aux] = lrng_aux_get_pool(eb->e[lrng_ext_es_aux], + requested_bits); + + /* Mix the extracted data back into pool for backtracking resistance */ + if (lrng_aux_pool_insert_locked((u8 *)eb, + sizeof(struct entropy_buf), 0)) + pr_warn("Backtracking resistance operation failed\n"); + + spin_unlock_irqrestore(&pool->lock, flags); +} + +static void lrng_aux_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + " Hash for operating entropy pool: %s\n" + " Available entropy: %u\n", + lrng_drng_init->hash_cb->hash_name(), + lrng_aux_avail_entropy(0)); +} + +struct lrng_es_cb lrng_es_aux = { + .name = "Auxiliary", + .get_ent = lrng_aux_get_backtrack, + .curr_entropy = lrng_aux_avail_entropy, + .max_entropy = lrng_get_digestsize, + .state = lrng_aux_es_state, + .reset = lrng_aux_reset, + .switch_hash = lrng_aux_switch_hash, +}; diff --git a/drivers/char/lrng/lrng_es_aux.h b/drivers/char/lrng/lrng_es_aux.h new file mode 100644 index 0000000000000..bc41e6474aad2 --- /dev/null +++ b/drivers/char/lrng/lrng_es_aux.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_AUX_H +#define _LRNG_ES_AUX_H + +#include "lrng_drng_mgr.h" +#include "lrng_es_mgr_cb.h" + +u32 lrng_get_digestsize(void); +void lrng_pool_set_entropy(u32 entropy_bits); +int lrng_pool_insert_aux(const u8 *inbuf, u32 inbuflen, u32 entropy_bits); + +extern struct lrng_es_cb lrng_es_aux; + +/****************************** Helper code ***********************************/ + +/* Obtain the security strength of the LRNG in bits */ +static inline u32 lrng_security_strength(void) +{ + /* + * We use a hash to read the entropy in the entropy pool. According to + * SP800-90B table 1, the entropy can be at most the digest size. + * Considering this together with the last sentence in section 3.1.5.1.2 + * the security strength of a (approved) hash is equal to its output + * size. On the other hand the entropy cannot be larger than the + * security strength of the used DRBG. + */ + return min_t(u32, LRNG_FULL_SEED_ENTROPY_BITS, lrng_get_digestsize()); +} + +static inline u32 lrng_get_seed_entropy_osr(bool fully_seeded) +{ + u32 requested_bits = lrng_security_strength(); + + /* Apply oversampling during initialization according to SP800-90C */ + if (lrng_sp80090c_compliant() && !fully_seeded) + requested_bits += LRNG_SEED_BUFFER_INIT_ADD_BITS; + return requested_bits; +} + +#endif /* _LRNG_ES_AUX_H */ diff --git a/drivers/char/lrng/lrng_es_cpu.c b/drivers/char/lrng/lrng_es_cpu.c new file mode 100644 index 0000000000000..3910108cfd088 --- /dev/null +++ b/drivers/char/lrng/lrng_es_cpu.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: CPU-based entropy source + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_definitions.h" +#include "lrng_es_aux.h" +#include "lrng_es_cpu.h" + +/* + * Estimated entropy of data is a 32th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * As we have no ability to review the implementation of those noise sources, + * it is prudent to have a conservative estimate here. + */ +#define LRNG_ARCHRANDOM_DEFAULT_STRENGTH CONFIG_LRNG_CPU_ENTROPY_RATE +#define LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH LRNG_DRNG_SECURITY_STRENGTH_BITS +#ifdef CONFIG_RANDOM_TRUST_CPU +static u32 cpu_entropy = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; +#else +static u32 cpu_entropy = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; +#endif +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(cpu_entropy, uint, 0644); +MODULE_PARM_DESC(cpu_entropy, "Entropy in bits of 256 data bits from CPU noise source (e.g. RDSEED)"); +#endif + +static int __init lrng_parse_trust_cpu(char *arg) +{ + int ret; + bool trust_cpu = false; + + ret = kstrtobool(arg, &trust_cpu); + if (ret) + return ret; + + if (trust_cpu) { + cpu_entropy = LRNG_ARCHRANDOM_TRUST_CPU_STRENGTH; + lrng_es_add_entropy(); + } else { + cpu_entropy = LRNG_ARCHRANDOM_DEFAULT_STRENGTH; + } + + return 0; +} +early_param("random.trust_cpu", lrng_parse_trust_cpu); + +static u32 lrng_cpu_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel(cpu_entropy, requested_bits); +} + +static u32 lrng_cpu_poolsize(void) +{ + return lrng_cpu_entropylevel(lrng_security_strength()); +} + +static u32 lrng_get_cpu_data(u8 *outbuf, u32 requested_bits) +{ + u32 i; + + /* operate on full blocks */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BYTES % sizeof(unsigned long)); + BUILD_BUG_ON(LRNG_SEED_BUFFER_INIT_ADD_BITS % sizeof(unsigned long)); + /* ensure we have aligned buffers */ + BUILD_BUG_ON(LRNG_KCAPI_ALIGN % sizeof(unsigned long)); + + for (i = 0; i < (requested_bits >> 3); + i += sizeof(unsigned long)) { + if (!arch_get_random_seed_long((unsigned long *)(outbuf + i)) && + !arch_get_random_long((unsigned long *)(outbuf + i))) { + cpu_entropy = 0; + return 0; + } + } + + return requested_bits; +} + +static u32 lrng_get_cpu_data_compress(u8 *outbuf, u32 requested_bits, + u32 data_multiplier) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_hash_cb *hash_cb; + struct lrng_drng *drng = lrng_drng_node_instance(); + unsigned long flags; + u32 ent_bits = 0, i, partial_bits = 0, digestsize, digestsize_bits, + full_bits; + void *hash; + + read_lock_irqsave(&drng->hash_lock, flags); + hash_cb = drng->hash_cb; + hash = drng->hash; + + digestsize = hash_cb->hash_digestsize(hash); + digestsize_bits = digestsize << 3; + /* Cap to maximum entropy that can ever be generated with given hash */ + lrng_cap_requested(digestsize_bits, requested_bits); + full_bits = requested_bits * data_multiplier; + + /* Calculate oversampling for SP800-90C */ + if (lrng_sp80090c_compliant()) { + /* Complete amount of bits to be pulled */ + full_bits += LRNG_OVERSAMPLE_ES_BITS * data_multiplier; + /* Full blocks that will be pulled */ + data_multiplier = full_bits / requested_bits; + /* Partial block in bits to be pulled */ + partial_bits = full_bits - (data_multiplier * requested_bits); + } + + if (hash_cb->hash_init(shash, hash)) + goto out; + + /* Hash all data from the CPU entropy source */ + for (i = 0; i < data_multiplier; i++) { + ent_bits = lrng_get_cpu_data(outbuf, requested_bits); + if (!ent_bits) + goto out; + + if (hash_cb->hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + } + + /* Hash partial block, if applicable */ + ent_bits = lrng_get_cpu_data(outbuf, partial_bits); + if (ent_bits && + hash_cb->hash_update(shash, outbuf, ent_bits >> 3)) + goto err; + + pr_debug("pulled %u bits from CPU RNG entropy source\n", full_bits); + ent_bits = requested_bits; + + /* Generate the compressed data to be returned to the caller */ + if (requested_bits < digestsize_bits) { + u8 digest[LRNG_MAX_DIGESTSIZE]; + + if (hash_cb->hash_final(shash, digest)) + goto err; + + /* Truncate output data to requested size */ + memcpy(outbuf, digest, requested_bits >> 3); + memzero_explicit(digest, digestsize); + } else { + if (hash_cb->hash_final(shash, outbuf)) + goto err; + } + +out: + hash_cb->hash_desc_zero(shash); + read_unlock_irqrestore(&drng->hash_lock, flags); + return ent_bits; + +err: + ent_bits = 0; + goto out; +} + +/* + * If CPU entropy source requires does not return full entropy, return the + * multiplier of how much data shall be sampled from it. + */ +static u32 lrng_cpu_multiplier(void) +{ + static u32 data_multiplier = 0; + unsigned long v; + + if (data_multiplier > 0) + return data_multiplier; + + if (IS_ENABLED(CONFIG_X86) && !arch_get_random_seed_long(&v)) { + /* + * Intel SPEC: pulling 512 blocks from RDRAND ensures + * one reseed making it logically equivalent to RDSEED. + */ + data_multiplier = 512; + } else if (IS_ENABLED(CONFIG_PPC)) { + /* + * PowerISA defines DARN to deliver at least 0.5 bits of + * entropy per data bit. + */ + data_multiplier = 2; + } else if (IS_ENABLED(CONFIG_RISCV)) { + /* + * riscv-crypto-spec-scalar-1.0.0-rc6.pdf section 4.2 defines + * this requirement. + */ + data_multiplier = 2; + } else { + /* CPU provides full entropy */ + data_multiplier = CONFIG_LRNG_CPU_FULL_ENT_MULTIPLIER; + } + return data_multiplier; +} + +static int +lrng_cpu_switch_hash(struct lrng_drng *drng, int node, + const struct lrng_hash_cb *new_cb, void *new_hash, + const struct lrng_hash_cb *old_cb) +{ + u32 digestsize, multiplier; + + if (!IS_ENABLED(CONFIG_LRNG_SWITCH)) + return -EOPNOTSUPP; + + digestsize = lrng_get_digestsize(); + multiplier = lrng_cpu_multiplier(); + + /* + * It would be security violation if the new digestsize is smaller than + * the set CPU entropy rate. + */ + WARN_ON(multiplier > 1 && digestsize < cpu_entropy); + cpu_entropy = min_t(u32, digestsize, cpu_entropy); + return 0; +} + +/* + * lrng_get_arch() - Get CPU entropy source entropy + * + * @eb: entropy buffer to store entropy + * @requested_bits: requested entropy in bits + */ +static void lrng_cpu_get(struct entropy_buf *eb, u32 requested_bits, + bool __unused) +{ + u32 ent_bits, data_multiplier = lrng_cpu_multiplier(); + + if (data_multiplier <= 1) { + ent_bits = lrng_get_cpu_data(eb->e[lrng_ext_es_cpu], + requested_bits); + } else { + ent_bits = lrng_get_cpu_data_compress(eb->e[lrng_ext_es_cpu], + requested_bits, + data_multiplier); + } + + ent_bits = lrng_cpu_entropylevel(ent_bits); + pr_debug("obtained %u bits of entropy from CPU RNG entropy source\n", + ent_bits); + eb->e_bits[lrng_ext_es_cpu] = ent_bits; +} + +static void lrng_cpu_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 data_multiplier = lrng_cpu_multiplier(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + " Hash for compressing data: %s\n" + " Available entropy: %u\n" + " Data multiplier: %u\n", + (data_multiplier <= 1) ? + "N/A" : lrng_drng_init->hash_cb->hash_name(), + lrng_cpu_poolsize(), + data_multiplier); +} + +struct lrng_es_cb lrng_es_cpu = { + .name = "CPU", + .get_ent = lrng_cpu_get, + .curr_entropy = lrng_cpu_entropylevel, + .max_entropy = lrng_cpu_poolsize, + .state = lrng_cpu_es_state, + .reset = NULL, + .switch_hash = lrng_cpu_switch_hash, +}; diff --git a/drivers/char/lrng/lrng_es_cpu.h b/drivers/char/lrng/lrng_es_cpu.h new file mode 100644 index 0000000000000..8dbb4d9a2926c --- /dev/null +++ b/drivers/char/lrng/lrng_es_cpu.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_CPU_H +#define _LRNG_ES_CPU_H + +#include "lrng_es_mgr_cb.h" + +#ifdef CONFIG_LRNG_CPU + +extern struct lrng_es_cb lrng_es_cpu; + +#endif /* CONFIG_LRNG_CPU */ + +#endif /* _LRNG_ES_CPU_H */ diff --git a/drivers/char/lrng/lrng_es_irq.c b/drivers/char/lrng/lrng_es_irq.c new file mode 100644 index 0000000000000..9336d0045e06d --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Interrupt data collection + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "lrng_es_aux.h" +#include "lrng_es_irq.h" +#include "lrng_es_timer_common.h" +#include "lrng_health.h" +#include "lrng_numa.h" +#include "lrng_testing.h" + +/* + * Number of interrupts to be recorded to assume that DRNG security strength + * bits of entropy are received. + * Note: a value below the DRNG security strength should not be defined as this + * may imply the DRNG can never be fully seeded in case other noise + * sources are unavailable. + */ +#define LRNG_IRQ_ENTROPY_BITS LRNG_UINT32_C(CONFIG_LRNG_IRQ_ENTROPY_RATE) + + +/* Number of interrupts required for LRNG_DRNG_SECURITY_STRENGTH_BITS entropy */ +static u32 lrng_irq_entropy_bits = LRNG_IRQ_ENTROPY_BITS; + +static u32 irq_entropy __read_mostly = LRNG_IRQ_ENTROPY_BITS; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(irq_entropy, uint, 0444); +MODULE_PARM_DESC(irq_entropy, + "How many interrupts must be collected for obtaining 256 bits of entropy\n"); +#endif + +/* Per-CPU array holding concatenated IRQ entropy events */ +static DEFINE_PER_CPU(u32 [LRNG_DATA_ARRAY_SIZE], lrng_irq_array) + __aligned(LRNG_KCAPI_ALIGN); +static DEFINE_PER_CPU(u32, lrng_irq_array_ptr) = 0; +static DEFINE_PER_CPU(atomic_t, lrng_irq_array_irqs) = ATOMIC_INIT(0); + +/* + * The entropy collection is performed by executing the following steps: + * 1. fill up the per-CPU array holding the time stamps + * 2. once the per-CPU array is full, a compression of the data into + * the entropy pool is performed - this happens in interrupt context + * + * If step 2 is not desired in interrupt context, the following boolean + * needs to be set to false. This implies that old entropy data in the + * per-CPU array collected since the last DRNG reseed is overwritten with + * new entropy data instead of retaining the entropy with the compression + * operation. + * + * Impact on entropy: + * + * If continuous compression is enabled, the maximum entropy that is collected + * per CPU between DRNG reseeds is equal to the digest size of the used hash. + * + * If continuous compression is disabled, the maximum number of entropy events + * that can be collected per CPU is equal to LRNG_DATA_ARRAY_SIZE. This amount + * of events is converted into an entropy statement which then represents the + * maximum amount of entropy collectible per CPU between DRNG reseeds. + */ +static bool lrng_irq_continuous_compression __read_mostly = + IS_ENABLED(CONFIG_LRNG_ENABLE_CONTINUOUS_COMPRESSION); + +#ifdef CONFIG_LRNG_SWITCHABLE_CONTINUOUS_COMPRESSION +module_param(lrng_irq_continuous_compression, bool, 0444); +MODULE_PARM_DESC(lrng_irq_continuous_compression, + "Perform entropy compression if per-CPU entropy data array is full\n"); +#endif + +/* + * Per-CPU entropy pool with compressed entropy event + * + * The per-CPU entropy pool is defined as the hash state. New data is simply + * inserted into the entropy pool by performing a hash update operation. + * To read the entropy pool, a hash final must be invoked. However, before + * the entropy pool is released again after a hash final, the hash init must + * be performed. + */ +static DEFINE_PER_CPU(u8 [LRNG_POOL_SIZE], lrng_irq_pool) + __aligned(LRNG_KCAPI_ALIGN); +/* + * Lock to allow other CPUs to read the pool - as this is only done during + * reseed which is infrequent, this lock is hardly contended. + */ +static DEFINE_PER_CPU(spinlock_t, lrng_irq_lock); +static DEFINE_PER_CPU(bool, lrng_irq_lock_init) = false; + +static bool lrng_irq_pool_online(int cpu) +{ + return per_cpu(lrng_irq_lock_init, cpu); +} + +static void __init lrng_irq_check_compression_state(void) +{ + /* One pool must hold sufficient entropy for disabled compression */ + if (!lrng_irq_continuous_compression) { + u32 max_ent = min_t(u32, lrng_get_digestsize(), + lrng_data_to_entropy(LRNG_DATA_NUM_VALUES, + lrng_irq_entropy_bits)); + if (max_ent < lrng_security_strength()) { + pr_warn("Force continuous compression operation to ensure LRNG can hold enough entropy\n"); + lrng_irq_continuous_compression = true; + } + } +} + +void __init lrng_irq_es_init(bool highres_timer) +{ + /* Set a minimum number of interrupts that must be collected */ + irq_entropy = max_t(u32, LRNG_IRQ_ENTROPY_BITS, irq_entropy); + + if (highres_timer) { + lrng_irq_entropy_bits = irq_entropy; + } else { + u32 new_entropy = irq_entropy * LRNG_ES_OVERSAMPLING_FACTOR; + + lrng_irq_entropy_bits = (irq_entropy < new_entropy) ? + new_entropy : irq_entropy; + pr_warn("operating without high-resolution timer and applying IRQ oversampling factor %u\n", + LRNG_ES_OVERSAMPLING_FACTOR); + } + + lrng_irq_check_compression_state(); +} + +/* + * Reset all per-CPU pools - reset entropy estimator but leave the pool data + * that may or may not have entropy unchanged. + */ +static void lrng_irq_reset(void) +{ + int cpu; + + /* Trigger GCD calculation anew. */ + lrng_gcd_set(0); + + for_each_online_cpu(cpu) + atomic_set(per_cpu_ptr(&lrng_irq_array_irqs, cpu), 0); +} + +static u32 lrng_irq_avail_pool_size(void) +{ + u32 max_size = 0, max_pool = lrng_get_digestsize(); + int cpu; + + if (!lrng_irq_continuous_compression) + max_pool = min_t(u32, max_pool, LRNG_DATA_NUM_VALUES); + + for_each_online_cpu(cpu) { + if (lrng_irq_pool_online(cpu)) + max_size += max_pool; + } + + return max_size; +} + +/* Return entropy of unused IRQs present in all per-CPU pools. */ +static u32 lrng_irq_avail_entropy(u32 __unused) +{ + u32 digestsize_irqs, irq = 0; + int cpu; + + /* Only deliver entropy when SP800-90B self test is completed */ + if (!lrng_sp80090b_startup_complete_es(lrng_int_es_irq)) + return 0; + + /* Obtain the cap of maximum numbers of IRQs we count */ + digestsize_irqs = lrng_entropy_to_data(lrng_get_digestsize(), + lrng_irq_entropy_bits); + if (!lrng_irq_continuous_compression) { + /* Cap to max. number of IRQs the array can hold */ + digestsize_irqs = min_t(u32, digestsize_irqs, + LRNG_DATA_NUM_VALUES); + } + + for_each_online_cpu(cpu) { + if (!lrng_irq_pool_online(cpu)) + continue; + irq += min_t(u32, digestsize_irqs, + atomic_read_u32(per_cpu_ptr(&lrng_irq_array_irqs, + cpu))); + } + + /* Consider oversampling rate */ + return lrng_reduce_by_osr(lrng_data_to_entropy(irq, + lrng_irq_entropy_bits)); +} + +/* + * Trigger a switch of the hash implementation for the per-CPU pool. + * + * For each per-CPU pool, obtain the message digest with the old hash + * implementation, initialize the per-CPU pool again with the new hash + * implementation and inject the message digest into the new state. + * + * Assumption: the caller must guarantee that the new_cb is available during the + * entire operation (e.g. it must hold the lock against pointer updating). + */ +static int +lrng_irq_switch_hash(struct lrng_drng *drng, int node, + const struct lrng_hash_cb *new_cb, void *new_hash, + const struct lrng_hash_cb *old_cb) +{ + u8 digest[LRNG_MAX_DIGESTSIZE]; + u32 digestsize_irqs, found_irqs; + int ret = 0, cpu; + + if (!IS_ENABLED(CONFIG_LRNG_SWITCH)) + return -EOPNOTSUPP; + + for_each_online_cpu(cpu) { + struct shash_desc *pcpu_shash; + + /* + * Only switch the per-CPU pools for the current node because + * the hash_cb only applies NUMA-node-wide. + */ + if (cpu_to_node(cpu) != node || !lrng_irq_pool_online(cpu)) + continue; + + pcpu_shash = (struct shash_desc *)per_cpu_ptr(lrng_irq_pool, + cpu); + + digestsize_irqs = old_cb->hash_digestsize(pcpu_shash); + digestsize_irqs = lrng_entropy_to_data(digestsize_irqs << 3, + lrng_irq_entropy_bits); + + if (pcpu_shash->tfm == new_hash) + continue; + + /* Get the per-CPU pool hash with old digest ... */ + ret = old_cb->hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash with the new digest ... */ + new_cb->hash_init(pcpu_shash, new_hash) ?: + /* + * ... feed the old hash into the new state. We may feed + * uninitialized memory into the new state, but this is + * considered no issue and even good as we have some more + * uncertainty here. + */ + new_cb->hash_update(pcpu_shash, digest, + sizeof(digest)); + if (ret) + goto out; + + /* + * In case the new digest is larger than the old one, cap + * the available entropy to the old message digest used to + * process the existing data. + */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_irq_array_irqs, cpu), 0); + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + atomic_add_return_relaxed(found_irqs, + per_cpu_ptr(&lrng_irq_array_irqs, cpu)); + + pr_debug("Re-initialize per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + cpu, node, new_cb->hash_name()); + } + +out: + memzero_explicit(digest, sizeof(digest)); + return ret; +} + +/* + * When reading the per-CPU message digest, make sure we use the crypto + * callbacks defined for the NUMA node the per-CPU pool is defined for because + * the LRNG crypto switch support is only atomic per NUMA node. + */ +static u32 +lrng_irq_pool_hash_one(const struct lrng_hash_cb *pcpu_hash_cb, + void *pcpu_hash, int cpu, u8 *digest, u32 *digestsize) +{ + struct shash_desc *pcpu_shash = + (struct shash_desc *)per_cpu_ptr(lrng_irq_pool, cpu); + spinlock_t *lock = per_cpu_ptr(&lrng_irq_lock, cpu); + unsigned long flags; + u32 digestsize_irqs, found_irqs; + + /* Lock guarding against reading / writing to per-CPU pool */ + spin_lock_irqsave(lock, flags); + + *digestsize = pcpu_hash_cb->hash_digestsize(pcpu_hash); + digestsize_irqs = lrng_entropy_to_data(*digestsize << 3, + lrng_irq_entropy_bits); + + /* Obtain entropy statement like for the entropy pool */ + found_irqs = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_irq_array_irqs, cpu), 0); + /* Cap to maximum amount of data we can hold in hash */ + found_irqs = min_t(u32, found_irqs, digestsize_irqs); + + /* Cap to maximum amount of data we can hold in array */ + if (!lrng_irq_continuous_compression) + found_irqs = min_t(u32, found_irqs, LRNG_DATA_NUM_VALUES); + + /* Store all not-yet compressed data in data array into hash, ... */ + if (pcpu_hash_cb->hash_update(pcpu_shash, + (u8 *)per_cpu_ptr(lrng_irq_array, cpu), + LRNG_DATA_ARRAY_SIZE * sizeof(u32)) ?: + /* ... get the per-CPU pool digest, ... */ + pcpu_hash_cb->hash_final(pcpu_shash, digest) ?: + /* ... re-initialize the hash, ... */ + pcpu_hash_cb->hash_init(pcpu_shash, pcpu_hash) ?: + /* ... feed the old hash into the new state. */ + pcpu_hash_cb->hash_update(pcpu_shash, digest, *digestsize)) + found_irqs = 0; + + spin_unlock_irqrestore(lock, flags); + return found_irqs; +} + +/* + * Hash all per-CPU pools and return the digest to be used as seed data for + * seeding a DRNG. The caller must guarantee backtracking resistance. + * The function will only copy as much data as entropy is available into the + * caller-provided output buffer. + * + * This function handles the translation from the number of received interrupts + * into an entropy statement. The conversion depends on LRNG_IRQ_ENTROPY_BITS + * which defines how many interrupts must be received to obtain 256 bits of + * entropy. With this value, the function lrng_data_to_entropy converts a given + * data size (received interrupts, requested amount of data, etc.) into an + * entropy statement. lrng_entropy_to_data does the reverse. + * + * @eb: entropy buffer to store entropy + * @requested_bits: Requested amount of entropy + * @fully_seeded: indicator whether LRNG is fully seeded + */ +static void lrng_irq_pool_hash(struct entropy_buf *eb, u32 requested_bits, + bool fully_seeded) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_hash_cb *hash_cb; + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *drng = lrng_drng_init_instance(); + u8 digest[LRNG_MAX_DIGESTSIZE]; + unsigned long flags, flags2; + u32 found_irqs, collected_irqs = 0, collected_ent_bits, requested_irqs, + returned_ent_bits; + int ret, cpu; + void *hash; + + /* Only deliver entropy when SP800-90B self test is completed */ + if (!lrng_sp80090b_startup_complete_es(lrng_int_es_irq)) { + eb->e_bits[lrng_int_es_irq] = 0; + return; + } + + /* Lock guarding replacement of per-NUMA hash */ + read_lock_irqsave(&drng->hash_lock, flags); + + hash_cb = drng->hash_cb; + hash = drng->hash; + + /* The hash state of filled with all per-CPU pool hashes. */ + ret = hash_cb->hash_init(shash, hash); + if (ret) + goto err; + + /* Cap to maximum entropy that can ever be generated with given hash */ + lrng_cap_requested(hash_cb->hash_digestsize(hash) << 3, requested_bits); + requested_irqs = lrng_entropy_to_data(requested_bits + + lrng_compress_osr(), + lrng_irq_entropy_bits); + + /* + * Harvest entropy from each per-CPU hash state - even though we may + * have collected sufficient entropy, we will hash all per-CPU pools. + */ + for_each_online_cpu(cpu) { + struct lrng_drng *pcpu_drng = drng; + u32 digestsize, pcpu_unused_irqs = 0; + int node = cpu_to_node(cpu); + + /* If pool is not online, then no entropy is present. */ + if (!lrng_irq_pool_online(cpu)) + continue; + + if (lrng_drng && lrng_drng[node]) + pcpu_drng = lrng_drng[node]; + + if (pcpu_drng == drng) { + found_irqs = lrng_irq_pool_hash_one(hash_cb, hash, + cpu, digest, + &digestsize); + } else { + read_lock_irqsave(&pcpu_drng->hash_lock, flags2); + found_irqs = + lrng_irq_pool_hash_one(pcpu_drng->hash_cb, + pcpu_drng->hash, cpu, + digest, &digestsize); + read_unlock_irqrestore(&pcpu_drng->hash_lock, flags2); + } + + /* Inject the digest into the state of all per-CPU pools */ + ret = hash_cb->hash_update(shash, digest, digestsize); + if (ret) + goto err; + + collected_irqs += found_irqs; + if (collected_irqs > requested_irqs) { + pcpu_unused_irqs = collected_irqs - requested_irqs; + atomic_add_return_relaxed(pcpu_unused_irqs, + per_cpu_ptr(&lrng_irq_array_irqs, cpu)); + collected_irqs = requested_irqs; + } + pr_debug("%u interrupts used from entropy pool of CPU %d, %u interrupts remain unused\n", + found_irqs - pcpu_unused_irqs, cpu, pcpu_unused_irqs); + } + + ret = hash_cb->hash_final(shash, digest); + if (ret) + goto err; + + collected_ent_bits = lrng_data_to_entropy(collected_irqs, + lrng_irq_entropy_bits); + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from entropy pool noise source\n", + returned_ent_bits, collected_ent_bits); + + /* + * Truncate to available entropy as implicitly allowed by SP800-90B + * section 3.1.5.1.1 table 1 which awards truncated hashes full + * entropy. + * + * During boot time, we read requested_bits data with + * returned_ent_bits entropy. In case our conservative entropy + * estimate underestimates the available entropy we can transport as + * much available entropy as possible. + */ + memcpy(eb->e[lrng_int_es_irq], digest, + fully_seeded ? returned_ent_bits >> 3 : requested_bits >> 3); + eb->e_bits[lrng_int_es_irq] = returned_ent_bits; + +out: + hash_cb->hash_desc_zero(shash); + read_unlock_irqrestore(&drng->hash_lock, flags); + memzero_explicit(digest, sizeof(digest)); + return; + +err: + eb->e_bits[lrng_int_es_irq] = 0; + goto out; +} + +/* Compress the lrng_irq_array array into lrng_irq_pool */ +static void lrng_irq_array_compress(void) +{ + struct shash_desc *shash = + (struct shash_desc *)this_cpu_ptr(lrng_irq_pool); + struct lrng_drng *drng = lrng_drng_node_instance(); + const struct lrng_hash_cb *hash_cb; + spinlock_t *lock = this_cpu_ptr(&lrng_irq_lock); + unsigned long flags, flags2; + void *hash; + bool init = false; + + read_lock_irqsave(&drng->hash_lock, flags); + hash_cb = drng->hash_cb; + hash = drng->hash; + + if (unlikely(!this_cpu_read(lrng_irq_lock_init))) { + init = true; + spin_lock_init(lock); + this_cpu_write(lrng_irq_lock_init, true); + pr_debug("Initializing per-CPU entropy pool for CPU %d on NUMA node %d with hash %s\n", + raw_smp_processor_id(), numa_node_id(), + hash_cb->hash_name()); + } + + spin_lock_irqsave(lock, flags2); + + if (unlikely(init) && hash_cb->hash_init(shash, hash)) { + this_cpu_write(lrng_irq_lock_init, false); + pr_warn("Initialization of hash failed\n"); + } else if (lrng_irq_continuous_compression) { + /* Add entire per-CPU data array content into entropy pool. */ + if (hash_cb->hash_update(shash, + (u8 *)this_cpu_ptr(lrng_irq_array), + LRNG_DATA_ARRAY_SIZE * sizeof(u32))) + pr_warn_ratelimited("Hashing of entropy data failed\n"); + } + + spin_unlock_irqrestore(lock, flags2); + read_unlock_irqrestore(&drng->hash_lock, flags); +} + +/* Compress data array into hash */ +static void lrng_irq_array_to_hash(u32 ptr) +{ + u32 *array = this_cpu_ptr(lrng_irq_array); + + /* + * During boot time the hash operation is triggered more often than + * during regular operation. + */ + if (unlikely(!lrng_state_fully_seeded())) { + if ((ptr & 31) && (ptr < LRNG_DATA_WORD_MASK)) + return; + } else if (ptr < LRNG_DATA_WORD_MASK) { + return; + } + + if (lrng_raw_array_entropy_store(*array)) { + u32 i; + + /* + * If we fed even a part of the array to external analysis, we + * mark that the entire array and the per-CPU pool to have no + * entropy. This is due to the non-IID property of the data as + * we do not fully know whether the existing dependencies + * diminish the entropy beyond to what we expect it has. + */ + atomic_set(this_cpu_ptr(&lrng_irq_array_irqs), 0); + + for (i = 1; i < LRNG_DATA_ARRAY_SIZE; i++) + lrng_raw_array_entropy_store(*(array + i)); + } else { + lrng_irq_array_compress(); + /* Ping pool handler about received entropy */ + if (lrng_sp80090b_startup_complete_es(lrng_int_es_irq)) + lrng_es_add_entropy(); + } +} + +/* + * Concatenate full 32 bit word at the end of time array even when current + * ptr is not aligned to sizeof(data). + */ +static void _lrng_irq_array_add_u32(u32 data) +{ + /* Increment pointer by number of slots taken for input value */ + u32 pre_ptr, mask, ptr = this_cpu_add_return(lrng_irq_array_ptr, + LRNG_DATA_SLOTS_PER_UINT); + unsigned int pre_array; + + /* + * This function injects a unit into the array - guarantee that + * array unit size is equal to data type of input data. + */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != (sizeof(data) << 3)); + + /* + * The following logic requires at least two units holding + * the data as otherwise the pointer would immediately wrap when + * injection an u32 word. + */ + BUILD_BUG_ON(LRNG_DATA_NUM_VALUES <= LRNG_DATA_SLOTS_PER_UINT); + + lrng_data_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_irq_array[pre_array], ~(0xffffffff & ~mask)); + this_cpu_or(lrng_irq_array[pre_array], data & ~mask); + + /* Invoke compression as we just filled data array completely */ + if (unlikely(pre_ptr > ptr)) + lrng_irq_array_to_hash(LRNG_DATA_WORD_MASK); + + /* LSB of data go into current unit */ + this_cpu_write(lrng_irq_array[lrng_data_idx2array(ptr)], + data & mask); + + if (likely(pre_ptr <= ptr)) + lrng_irq_array_to_hash(ptr); +} + +/* Concatenate a 32-bit word at the end of the per-CPU array */ +void lrng_irq_array_add_u32(u32 data) +{ + /* + * Disregard entropy-less data without continuous compression to + * avoid it overwriting data with entropy when array ptr wraps. + */ + if (lrng_irq_continuous_compression) + _lrng_irq_array_add_u32(data); +} + +/* Concatenate data of max LRNG_DATA_SLOTSIZE_MASK at the end of time array */ +static void lrng_irq_array_add_slot(u32 data) +{ + /* Get slot */ + u32 ptr = this_cpu_inc_return(lrng_irq_array_ptr) & + LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS % LRNG_DATA_SLOTSIZE_BITS); + /* Ensure consistency of values */ + BUILD_BUG_ON(LRNG_DATA_ARRAY_MEMBER_BITS != + sizeof(lrng_irq_array[0]) << 3); + + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_irq_array[array], + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot))); + /* Store data into slot */ + this_cpu_or(lrng_irq_array[array], lrng_data_slot_val(data, slot)); + + lrng_irq_array_to_hash(ptr); +} + +static void +lrng_time_process_common(u32 time, void(*add_time)(u32 data)) +{ + enum lrng_health_res health_test; + + if (lrng_raw_hires_entropy_store(time)) + return; + + health_test = lrng_health_test(time, lrng_int_es_irq); + if (health_test > lrng_health_fail_use) + return; + + if (health_test == lrng_health_pass) + atomic_inc_return(this_cpu_ptr(&lrng_irq_array_irqs)); + + add_time(time); +} + +/* + * Batching up of entropy in per-CPU array before injecting into entropy pool. + */ +static void lrng_time_process(void) +{ + u32 now_time = random_get_entropy(); + + if (unlikely(!lrng_gcd_tested())) { + /* When GCD is unknown, we process the full time stamp */ + lrng_time_process_common(now_time, _lrng_irq_array_add_u32); + lrng_gcd_add_value(now_time); + } else { + /* GCD is known and applied */ + lrng_time_process_common((now_time / lrng_gcd_get()) & + LRNG_DATA_SLOTSIZE_MASK, + lrng_irq_array_add_slot); + } + + lrng_perf_time(now_time); +} + +/* Hot code path - Callback for interrupt handler */ +void add_interrupt_randomness(int irq) +{ + if (lrng_highres_timer()) { + lrng_time_process(); + } else { + struct pt_regs *regs = get_irq_regs(); + static atomic_t reg_idx = ATOMIC_INIT(0); + u64 ip; + u32 tmp; + + if (regs) { + u32 *ptr = (u32 *)regs; + int reg_ptr = atomic_add_return_relaxed(1, ®_idx); + size_t n = (sizeof(struct pt_regs) / sizeof(u32)); + + ip = instruction_pointer(regs); + tmp = *(ptr + (reg_ptr % n)); + tmp = lrng_raw_regs_entropy_store(tmp) ? 0 : tmp; + _lrng_irq_array_add_u32(tmp); + } else { + ip = _RET_IP_; + } + + lrng_time_process(); + + /* + * The XOR operation combining the different values is not + * considered to destroy entropy since the entirety of all + * processed values delivers the entropy (and not each + * value separately of the other values). + */ + tmp = lrng_raw_jiffies_entropy_store(jiffies) ? 0 : jiffies; + tmp ^= lrng_raw_irq_entropy_store(irq) ? 0 : irq; + tmp ^= lrng_raw_retip_entropy_store(ip) ? 0 : ip; + tmp ^= ip >> 32; + _lrng_irq_array_add_u32(tmp); + } +} +EXPORT_SYMBOL(add_interrupt_randomness); + +static void lrng_irq_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + " Hash for operating entropy pool: %s\n" + " Available entropy: %u\n" + " per-CPU interrupt collection size: %u\n" + " Standards compliance: %s\n" + " High-resolution timer: %s\n" + " Continuous compression: %s\n", + lrng_drng_init->hash_cb->hash_name(), + lrng_irq_avail_entropy(0), + LRNG_DATA_NUM_VALUES, + lrng_sp80090b_compliant(lrng_int_es_irq) ? "SP800-90B " : "", + lrng_highres_timer() ? "true" : "false", + lrng_irq_continuous_compression ? "true" : "false"); +} + +struct lrng_es_cb lrng_es_irq = { + .name = "IRQ", + .get_ent = lrng_irq_pool_hash, + .curr_entropy = lrng_irq_avail_entropy, + .max_entropy = lrng_irq_avail_pool_size, + .state = lrng_irq_es_state, + .reset = lrng_irq_reset, + .switch_hash = lrng_irq_switch_hash, +}; diff --git a/drivers/char/lrng/lrng_es_irq.h b/drivers/char/lrng/lrng_es_irq.h new file mode 100644 index 0000000000000..2cd746611cf0c --- /dev/null +++ b/drivers/char/lrng/lrng_es_irq.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_IRQ_H +#define _LRNG_ES_IRQ_H + +#include + +#include "lrng_es_mgr_cb.h" + +#ifdef CONFIG_LRNG_IRQ +void lrng_irq_es_init(bool highres_timer); +void lrng_irq_array_add_u32(u32 data); + +extern struct lrng_es_cb lrng_es_irq; + +#else /* CONFIG_LRNG_IRQ */ +static inline void lrng_irq_es_init(bool highres_timer) { } +static inline void lrng_irq_array_add_u32(u32 data) { } +#endif /* CONFIG_LRNG_IRQ */ + +#endif /* _LRNG_ES_IRQ_H */ diff --git a/drivers/char/lrng/lrng_es_jent.c b/drivers/char/lrng/lrng_es_jent.c new file mode 100644 index 0000000000000..dbb2fce591f08 --- /dev/null +++ b/drivers/char/lrng/lrng_es_jent.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: Jitter RNG + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_definitions.h" +#include "lrng_es_aux.h" +#include "lrng_es_jent.h" + +/* + * Estimated entropy of data is a 16th of LRNG_DRNG_SECURITY_STRENGTH_BITS. + * Albeit a full entropy assessment is provided for the noise source indicating + * that it provides high entropy rates and considering that it deactivates + * when it detects insufficient hardware, the chosen under estimation of + * entropy is considered to be acceptable to all reviewers. + */ +static u32 jent_entropy = CONFIG_LRNG_JENT_ENTROPY_RATE; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(jent_entropy, uint, 0644); +MODULE_PARM_DESC(jent_entropy, "Entropy in bits of 256 data bits from Jitter RNG noise source"); +#endif + +static bool lrng_jent_initialized = false; +static struct rand_data *lrng_jent_state; + +static int __init lrng_jent_initialize(void) +{ + /* Initialize the Jitter RNG after the clocksources are initialized. */ + if (jent_entropy_init() || + (lrng_jent_state = jent_entropy_collector_alloc(1, 0)) == NULL) { + jent_entropy = 0; + pr_info("Jitter RNG unusable on current system\n"); + return 0; + } + lrng_jent_initialized = true; + pr_debug("Jitter RNG working on current system\n"); + + /* + * In FIPS mode, the Jitter RNG is defined to have full of entropy + * unless a different value has been specified at the command line + * (i.e. the user overrides the default), and the default value is + * larger than zero (if it is zero, it is assumed that an RBG2(P) or + * RBG2(NP) construction is attempted that intends to exclude the + * Jitter RNG). + */ + if (fips_enabled && + CONFIG_LRNG_JENT_ENTROPY_RATE > 0 && + jent_entropy == CONFIG_LRNG_JENT_ENTROPY_RATE) + jent_entropy = LRNG_DRNG_SECURITY_STRENGTH_BITS; + + lrng_drng_force_reseed(); + if (jent_entropy) + lrng_es_add_entropy(); + + return 0; +} +device_initcall(lrng_jent_initialize); + +static u32 lrng_jent_entropylevel(u32 requested_bits) +{ + return lrng_fast_noise_entropylevel(lrng_jent_initialized ? + jent_entropy : 0, requested_bits); +} + +static u32 lrng_jent_poolsize(void) +{ + return lrng_jent_entropylevel(lrng_security_strength()); +} + +/* + * lrng_get_jent() - Get Jitter RNG entropy + * + * @eb: entropy buffer to store entropy + * @requested_bits: requested entropy in bits + */ +static void lrng_jent_get(struct entropy_buf *eb, u32 requested_bits, + bool __unused) +{ + int ret; + u32 ent_bits = lrng_jent_entropylevel(requested_bits); + unsigned long flags; + static DEFINE_SPINLOCK(lrng_jent_lock); + + spin_lock_irqsave(&lrng_jent_lock, flags); + + if (!lrng_jent_initialized) { + spin_unlock_irqrestore(&lrng_jent_lock, flags); + goto err; + } + + ret = jent_read_entropy(lrng_jent_state, eb->e[lrng_ext_es_jitter], + requested_bits >> 3); + spin_unlock_irqrestore(&lrng_jent_lock, flags); + + if (ret) { + pr_debug("Jitter RNG failed with %d\n", ret); + goto err; + } + + pr_debug("obtained %u bits of entropy from Jitter RNG noise source\n", + ent_bits); + + eb->e_bits[lrng_ext_es_jitter] = ent_bits; + return; + +err: + eb->e_bits[lrng_ext_es_jitter] = 0; +} + +static void lrng_jent_es_state(unsigned char *buf, size_t buflen) +{ + snprintf(buf, buflen, + " Available entropy: %u\n" + " Enabled: %s\n", + lrng_jent_poolsize(), + lrng_jent_initialized ? "true" : "false"); +} + +struct lrng_es_cb lrng_es_jent = { + .name = "JitterRNG", + .get_ent = lrng_jent_get, + .curr_entropy = lrng_jent_entropylevel, + .max_entropy = lrng_jent_poolsize, + .state = lrng_jent_es_state, + .reset = NULL, + .switch_hash = NULL, +}; diff --git a/drivers/char/lrng/lrng_es_jent.h b/drivers/char/lrng/lrng_es_jent.h new file mode 100644 index 0000000000000..32882d4bdf99b --- /dev/null +++ b/drivers/char/lrng/lrng_es_jent.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_JENT_H +#define _LRNG_ES_JENT_H + +#include "lrng_es_mgr_cb.h" + +#ifdef CONFIG_LRNG_JENT + +extern struct lrng_es_cb lrng_es_jent; + +#endif /* CONFIG_LRNG_JENT */ + +#endif /* _LRNG_ES_JENT_H */ diff --git a/drivers/char/lrng/lrng_es_krng.c b/drivers/char/lrng/lrng_es_krng.c new file mode 100644 index 0000000000000..0ef88e6c8d562 --- /dev/null +++ b/drivers/char/lrng/lrng_es_krng.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Fast Entropy Source: Linux kernel RNG (random.c) + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +#include "lrng_es_aux.h" +#include "lrng_es_krng.h" + +static u32 krng_entropy = CONFIG_LRNG_KERNEL_RNG_ENTROPY_RATE; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(krng_entropy, uint, 0644); +MODULE_PARM_DESC(krng_entropy, "Entropy in bits of 256 data bits from the kernel RNG noise source"); +#endif + +static atomic_t lrng_krng_initial_rate = ATOMIC_INIT(0); + +static struct notifier_block lrng_krng_ready = { + .notifier_call = NULL, +}; + +static u32 lrng_krng_fips_entropylevel(u32 entropylevel) +{ + return fips_enabled ? 0 : entropylevel; +} + +static int lrng_krng_adjust_entropy(struct notifier_block *nb, + unsigned long action, void *data) +{ + u32 entropylevel; + + krng_entropy = atomic_read_u32(&lrng_krng_initial_rate); + + entropylevel = lrng_krng_fips_entropylevel(krng_entropy); + pr_debug("Kernel RNG is fully seeded, setting entropy rate to %u bits of entropy\n", + entropylevel); + lrng_drng_force_reseed(); + if (entropylevel) + lrng_es_add_entropy(); + return 0; +} + +static u32 lrng_krng_entropylevel(u32 requested_bits) +{ + if (lrng_krng_ready.notifier_call == NULL) { + int err; + + lrng_krng_ready.notifier_call = lrng_krng_adjust_entropy; + + err = register_random_ready_notifier(&lrng_krng_ready); + switch (err) { + case 0: + atomic_set(&lrng_krng_initial_rate, krng_entropy); + krng_entropy = 0; + pr_debug("Kernel RNG is not yet seeded, setting entropy rate to 0 bits of entropy\n"); + break; + + case -EALREADY: + pr_debug("Kernel RNG is fully seeded, setting entropy rate to %u bits of entropy\n", + lrng_krng_fips_entropylevel(krng_entropy)); + break; + default: + lrng_krng_ready.notifier_call = NULL; + return 0; + } + } + + return lrng_fast_noise_entropylevel( + lrng_krng_fips_entropylevel(krng_entropy), requested_bits); +} + +static u32 lrng_krng_poolsize(void) +{ + return lrng_krng_entropylevel(lrng_security_strength()); +} + +/* + * lrng_krng_get() - Get kernel RNG entropy + * + * @eb: entropy buffer to store entropy + * @requested_bits: requested entropy in bits + */ +static void lrng_krng_get(struct entropy_buf *eb, u32 requested_bits, + bool __unused) +{ + u32 ent_bits = lrng_krng_entropylevel(requested_bits); + + get_random_bytes(eb->e[lrng_ext_es_krng], requested_bits >> 3); + + pr_debug("obtained %u bits of entropy from kernel RNG noise source\n", + ent_bits); + + eb->e_bits[lrng_ext_es_krng] = ent_bits; +} + +static void lrng_krng_es_state(unsigned char *buf, size_t buflen) +{ + snprintf(buf, buflen, + " Available entropy: %u\n" + " Entropy Rate per 256 data bits: %u\n", + lrng_krng_poolsize(), + lrng_krng_entropylevel(256)); +} + +struct lrng_es_cb lrng_es_krng = { + .name = "KernelRNG", + .get_ent = lrng_krng_get, + .curr_entropy = lrng_krng_entropylevel, + .max_entropy = lrng_krng_poolsize, + .state = lrng_krng_es_state, + .reset = NULL, + .switch_hash = NULL, +}; diff --git a/drivers/char/lrng/lrng_es_krng.h b/drivers/char/lrng/lrng_es_krng.h new file mode 100644 index 0000000000000..cf982b9eea053 --- /dev/null +++ b/drivers/char/lrng/lrng_es_krng.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_RANDOM_H +#define _LRNG_ES_RANDOM_H + +#include "lrng_es_mgr_cb.h" + +#ifdef CONFIG_LRNG_KERNEL_RNG + +extern struct lrng_es_cb lrng_es_krng; + +#endif /* CONFIG_LRNG_KERNEL_RNG */ + +#endif /* _LRNG_ES_RANDOM_H */ diff --git a/drivers/char/lrng/lrng_es_mgr.c b/drivers/char/lrng/lrng_es_mgr.c new file mode 100644 index 0000000000000..050f96bfafab4 --- /dev/null +++ b/drivers/char/lrng/lrng_es_mgr.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Entropy sources management + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_aux.h" +#include "lrng_es_cpu.h" +#include "lrng_es_irq.h" +#include "lrng_es_jent.h" +#include "lrng_es_krng.h" +#include "lrng_es_mgr.h" +#include "lrng_es_sched.h" +#include "lrng_interface_dev_common.h" +#include "lrng_interface_random_kernel.h" + +struct lrng_state { + bool can_invalidate; /* Can invalidate batched entropy? */ + bool perform_seedwork; /* Can seed work be performed? */ + bool lrng_operational; /* Is DRNG operational? */ + bool lrng_fully_seeded; /* Is DRNG fully seeded? */ + bool lrng_min_seeded; /* Is DRNG minimally seeded? */ + bool all_online_numa_node_seeded;/* All NUMA DRNGs seeded? */ + + /* + * To ensure that external entropy providers cannot dominate the + * internal noise sources but yet cannot be dominated by internal + * noise sources, the following booleans are intended to allow + * external to provide seed once when a DRNG reseed occurs. This + * triggering of external noise source is performed even when the + * entropy pool has sufficient entropy. + */ + + atomic_t boot_entropy_thresh; /* Reseed threshold */ + atomic_t reseed_in_progress; /* Flag for on executing reseed */ + struct work_struct lrng_seed_work; /* (re)seed work queue */ +}; + +static struct lrng_state lrng_state = { + false, false, false, false, false, false, + .boot_entropy_thresh = ATOMIC_INIT(LRNG_INIT_ENTROPY_BITS), + .reseed_in_progress = ATOMIC_INIT(0), +}; + +/* + * If the entropy count falls under this number of bits, then we + * should wake up processes which are selecting or polling on write + * access to /dev/random. + */ +u32 lrng_write_wakeup_bits = (LRNG_WRITE_WAKEUP_ENTROPY << 3); + +/* + * The entries must be in the same order as defined by enum lrng_internal_es and + * enum lrng_external_es + */ +struct lrng_es_cb *lrng_es[] = { +#ifdef CONFIG_LRNG_IRQ + &lrng_es_irq, +#endif +#ifdef CONFIG_LRNG_SCHED + &lrng_es_sched, +#endif +#ifdef CONFIG_LRNG_JENT + &lrng_es_jent, +#endif +#ifdef CONFIG_LRNG_CPU + &lrng_es_cpu, +#endif +#ifdef CONFIG_LRNG_KERNEL_RNG + &lrng_es_krng, +#endif + &lrng_es_aux +}; + +/********************************** Helper ***********************************/ + +void lrng_debug_report_seedlevel(const char *name) +{ +#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM + static void *previous = NULL; + void *caller = (void *) _RET_IP_; + + if (READ_ONCE(previous) == caller) + return; + + if (!lrng_state_min_seeded()) + pr_notice("%pS %s called without reaching minimally seeded level (available entropy %u)\n", + caller, name, lrng_avail_entropy()); + + WRITE_ONCE(previous, caller); +#endif +} + +/* + * Reading of the LRNG pool is only allowed by one caller. The reading is + * only performed to (re)seed DRNGs. Thus, if this "lock" is already taken, + * the reseeding operation is in progress. The caller is not intended to wait + * but continue with its other operation. + */ +int lrng_pool_trylock(void) +{ + return atomic_cmpxchg(&lrng_state.reseed_in_progress, 0, 1); +} + +void lrng_pool_unlock(void) +{ + atomic_set(&lrng_state.reseed_in_progress, 0); +} + +/* Set new entropy threshold for reseeding during boot */ +void lrng_set_entropy_thresh(u32 new_entropy_bits) +{ + atomic_set(&lrng_state.boot_entropy_thresh, new_entropy_bits); +} + +/* + * Reset LRNG state - the entropy counters are reset, but the data that may + * or may not have entropy remains in the pools as this data will not hurt. + */ +void lrng_reset_state(void) +{ + u32 i; + + for_each_lrng_es(i) { + if (lrng_es[i]->reset) + lrng_es[i]->reset(); + } + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + lrng_state.lrng_min_seeded = false; + lrng_state.all_online_numa_node_seeded = false; + pr_debug("reset LRNG\n"); +} + +/* Set flag that all DRNGs are fully seeded */ +void lrng_pool_all_numa_nodes_seeded(bool set) +{ + lrng_state.all_online_numa_node_seeded = set; +} + +/* Return boolean whether LRNG reached minimally seed level */ +bool lrng_state_min_seeded(void) +{ + return lrng_state.lrng_min_seeded; +} + +/* Return boolean whether LRNG reached fully seed level */ +bool lrng_state_fully_seeded(void) +{ + return lrng_state.lrng_fully_seeded; +} + +/* Return boolean whether LRNG is considered fully operational */ +bool lrng_state_operational(void) +{ + return lrng_state.lrng_operational; +} + +static void lrng_init_wakeup(void) +{ + wake_up_all(&lrng_init_wait); + lrng_init_wakeup_dev(); +} + +static bool lrng_fully_seeded(bool fully_seeded, u32 collected_entropy) +{ + return (collected_entropy >= lrng_get_seed_entropy_osr(fully_seeded)); +} + +/* Policy to check whether entropy buffer contains full seeded entropy */ +bool lrng_fully_seeded_eb(bool fully_seeded, struct entropy_buf *eb) +{ + u32 i, collected_entropy = 0; + + for_each_lrng_es(i) + collected_entropy += eb->e_bits[i]; + + return lrng_fully_seeded(fully_seeded, collected_entropy); +} + +/* Mark one DRNG as not fully seeded */ +void lrng_unset_fully_seeded(struct lrng_drng *drng) +{ + drng->fully_seeded = false; + lrng_pool_all_numa_nodes_seeded(false); + + /* + * The init DRNG instance must always be fully seeded as this instance + * is the fall-back if any of the per-NUMA node DRNG instances is + * insufficiently seeded. Thus, we mark the entire LRNG as + * non-operational if the initial DRNG becomes not fully seeded. + */ + if (drng == lrng_drng_init_instance() && lrng_state_operational()) { + pr_debug("LRNG set to non-operational\n"); + lrng_state.lrng_operational = false; + lrng_state.lrng_fully_seeded = false; + + /* If sufficient entropy is available, reseed now. */ + lrng_es_add_entropy(); + } +} + +/* Policy to enable LRNG operational mode */ +static void lrng_set_operational(void) +{ + /* + * LRNG is operational if the initial DRNG is fully seeded. This state + * can only occur if either the external entropy sources provided + * sufficient entropy, or the SP800-90B startup test completed for + * the internal ES to supply also entropy data. + */ + if (lrng_state.lrng_fully_seeded) { + lrng_state.lrng_operational = true; + lrng_process_ready_list(); + lrng_init_wakeup(); + pr_info("LRNG fully operational\n"); + } +} + +static u32 lrng_avail_entropy_thresh(void) +{ + u32 ent_thresh = lrng_security_strength(); + + /* + * Apply oversampling during initialization according to SP800-90C as + * we request a larger buffer from the ES. + */ + if (lrng_sp80090c_compliant() && + !lrng_state.all_online_numa_node_seeded) + ent_thresh += LRNG_SEED_BUFFER_INIT_ADD_BITS; + + return ent_thresh; +} + +/* Available entropy in the entire LRNG considering all entropy sources */ +u32 lrng_avail_entropy(void) +{ + u32 i, ent = 0, ent_thresh = lrng_avail_entropy_thresh(); + + BUILD_BUG_ON(ARRAY_SIZE(lrng_es) != lrng_ext_es_last); + for_each_lrng_es(i) + ent += lrng_es[i]->curr_entropy(ent_thresh); + return ent; +} + +/* + * lrng_init_ops() - Set seed stages of LRNG + * + * Set the slow noise source reseed trigger threshold. The initial threshold + * is set to the minimum data size that can be read from the pool: a word. Upon + * reaching this value, the next seed threshold of 128 bits is set followed + * by 256 bits. + * + * @eb: buffer containing the size of entropy currently injected into DRNG - if + * NULL, the function obtains the available entropy from the ES. + */ +void lrng_init_ops(struct entropy_buf *eb) +{ + struct lrng_state *state = &lrng_state; + u32 i, requested_bits, seed_bits = 0; + + if (state->lrng_operational) + return; + + requested_bits = lrng_get_seed_entropy_osr( + state->all_online_numa_node_seeded); + + if (eb) { + for_each_lrng_es(i) + seed_bits += eb->e_bits[i]; + } else { + u32 ent_thresh = lrng_avail_entropy_thresh(); + + for_each_lrng_es(i) + seed_bits += lrng_es[i]->curr_entropy(ent_thresh); + } + + /* DRNG is seeded with full security strength */ + if (state->lrng_fully_seeded) { + lrng_set_operational(); + lrng_set_entropy_thresh(requested_bits); + } else if (lrng_fully_seeded(state->all_online_numa_node_seeded, + seed_bits)) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_fully_seeded = true; + lrng_set_operational(); + state->lrng_min_seeded = true; + pr_info("LRNG fully seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + } else if (!state->lrng_min_seeded) { + + /* DRNG is seeded with at least 128 bits of entropy */ + if (seed_bits >= LRNG_MIN_SEED_ENTROPY_BITS) { + if (state->can_invalidate) + invalidate_batched_entropy(); + + state->lrng_min_seeded = true; + pr_info("LRNG minimally seeded with %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(requested_bits); + lrng_init_wakeup(); + + /* DRNG is seeded with at least LRNG_INIT_ENTROPY_BITS bits */ + } else if (seed_bits >= LRNG_INIT_ENTROPY_BITS) { + pr_info("LRNG initial entropy level %u bits of entropy\n", + seed_bits); + lrng_set_entropy_thresh(LRNG_MIN_SEED_ENTROPY_BITS); + } + } +} + +int __init lrng_rand_initialize(void) +{ + struct seed { + ktime_t time; + unsigned long data[(LRNG_MAX_DIGESTSIZE / + sizeof(unsigned long))]; + struct new_utsname utsname; + } seed __aligned(LRNG_KCAPI_ALIGN); + unsigned int i; + + BUILD_BUG_ON(LRNG_MAX_DIGESTSIZE % sizeof(unsigned long)); + + seed.time = ktime_get_real(); + + for (i = 0; i < ARRAY_SIZE(seed.data); i++) { +#ifdef CONFIG_LRNG_RANDOM_IF + if (!arch_get_random_seed_long_early(&(seed.data[i])) && + !arch_get_random_long_early(&seed.data[i])) +#else + if (!arch_get_random_seed_long(&(seed.data[i])) && + !arch_get_random_long(&seed.data[i])) +#endif + seed.data[i] = random_get_entropy(); + } + memcpy(&seed.utsname, utsname(), sizeof(*(utsname()))); + + lrng_pool_insert_aux((u8 *)&seed, sizeof(seed), 0); + memzero_explicit(&seed, sizeof(seed)); + + /* Initialize the seed work queue */ + INIT_WORK(&lrng_state.lrng_seed_work, lrng_drng_seed_work); + lrng_state.perform_seedwork = true; + + invalidate_batched_entropy(); + + lrng_state.can_invalidate = true; + + return 0; +} + +#ifndef CONFIG_LRNG_RANDOM_IF +early_initcall(lrng_rand_initialize); +#endif + +/* Interface requesting a reseed of the DRNG */ +void lrng_es_add_entropy(void) +{ + /* + * Once all DRNGs are fully seeded, the system-triggered arrival of + * entropy will not cause any reseeding any more. + */ + if (likely(lrng_state.all_online_numa_node_seeded)) + return; + + /* Only trigger the DRNG reseed if we have collected entropy. */ + if (lrng_avail_entropy() < + atomic_read_u32(&lrng_state.boot_entropy_thresh)) + return; + + /* Ensure that the seeding only occurs once at any given time. */ + if (lrng_pool_trylock()) + return; + + /* Seed the DRNG with any available noise. */ + if (lrng_state.perform_seedwork) + schedule_work(&lrng_state.lrng_seed_work); + else + lrng_drng_seed_work(NULL); +} + +/* Fill the seed buffer with data from the noise sources */ +void lrng_fill_seed_buffer(struct entropy_buf *eb, u32 requested_bits) +{ + struct lrng_state *state = &lrng_state; + u32 i, req_ent = lrng_sp80090c_compliant() ? + lrng_security_strength() : LRNG_MIN_SEED_ENTROPY_BITS; + + /* Guarantee that requested bits is a multiple of bytes */ + BUILD_BUG_ON(LRNG_DRNG_SECURITY_STRENGTH_BITS % 8); + + /* always reseed the DRNG with the current time stamp */ + eb->now = random_get_entropy(); + + /* + * Require at least 128 bits of entropy for any reseed. If the LRNG is + * operated SP800-90C compliant we want to comply with SP800-90A section + * 9.2 mandating that DRNG is reseeded with the security strength. + */ + if (state->lrng_fully_seeded && (lrng_avail_entropy() < req_ent)) { + for_each_lrng_es(i) + eb->e_bits[i] = 0; + + goto wakeup; + } + + /* Concatenate the output of the entropy sources. */ + for_each_lrng_es(i) { + lrng_es[i]->get_ent(eb, requested_bits, + state->lrng_fully_seeded); + } + + /* allow external entropy provider to provide seed */ + lrng_state_exseed_allow_all(); + +wakeup: + lrng_writer_wakeup(); +} diff --git a/drivers/char/lrng/lrng_es_mgr.h b/drivers/char/lrng/lrng_es_mgr.h new file mode 100644 index 0000000000000..e24f101de5a0c --- /dev/null +++ b/drivers/char/lrng/lrng_es_mgr.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_MGR_H +#define _LRNG_ES_MGR_H + +#include "lrng_es_mgr_cb.h" + +/*************************** General LRNG parameter ***************************/ + +#define LRNG_DRNG_BLOCKSIZE 64 /* Maximum of DRNG block sizes */ + +/* Helper to concatenate a macro with an integer type */ +#define LRNG_PASTER(x, y) x ## y +#define LRNG_UINT32_C(x) LRNG_PASTER(x, U) + +/************************* Entropy sources management *************************/ + +extern struct lrng_es_cb *lrng_es[]; + +#define for_each_lrng_es(ctr) \ + for ((ctr) = 0; (ctr) < lrng_ext_es_last; (ctr)++) + +bool lrng_state_min_seeded(void); +void lrng_debug_report_seedlevel(const char *name); +int lrng_rand_initialize(void); +bool lrng_state_operational(void); + +extern u32 lrng_write_wakeup_bits; +void lrng_set_entropy_thresh(u32 new); +u32 lrng_avail_entropy(void); +void lrng_reset_state(void); + +bool lrng_state_fully_seeded(void); + +int lrng_pool_trylock(void); +void lrng_pool_unlock(void); +void lrng_pool_all_numa_nodes_seeded(bool set); + +bool lrng_fully_seeded_eb(bool fully_seeded, struct entropy_buf *eb); +void lrng_unset_fully_seeded(struct lrng_drng *drng); +void lrng_fill_seed_buffer(struct entropy_buf *eb, u32 requested_bits); +void lrng_init_ops(struct entropy_buf *eb); + +#endif /* _LRNG_ES_MGR_H */ diff --git a/drivers/char/lrng/lrng_es_mgr_cb.h b/drivers/char/lrng/lrng_es_mgr_cb.h new file mode 100644 index 0000000000000..08b24e1b7766e --- /dev/null +++ b/drivers/char/lrng/lrng_es_mgr_cb.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + * + * Definition of an entropy source. + */ + +#ifndef _LRNG_ES_MGR_CB_H +#define _LRNG_ES_MGR_CB_H + +#include + +#include "lrng_definitions.h" +#include "lrng_drng_mgr.h" + +enum lrng_internal_es { +#ifdef CONFIG_LRNG_IRQ + lrng_int_es_irq, /* IRQ-based entropy source */ +#endif +#ifdef CONFIG_LRNG_SCHED + lrng_int_es_sched, /* Scheduler entropy source */ +#endif + lrng_int_es_last, /* MUST be the last entry */ +}; + +enum lrng_external_es { + lrng_ext_link = lrng_int_es_last - 1, /* Link entry */ +#ifdef CONFIG_LRNG_JENT + lrng_ext_es_jitter, /* Jitter RNG */ +#endif +#ifdef CONFIG_LRNG_CPU + lrng_ext_es_cpu, /* CPU-based, e.g. RDSEED */ +#endif +#ifdef CONFIG_LRNG_KERNEL_RNG + lrng_ext_es_krng, /* random.c */ +#endif + lrng_ext_es_aux, /* MUST BE LAST ES! */ + lrng_ext_es_last /* MUST be the last entry */ +}; + +struct entropy_buf { + u8 e[lrng_ext_es_last][LRNG_DRNG_INIT_SEED_SIZE_BYTES]; + u32 now, e_bits[lrng_ext_es_last]; +}; + +/* + * struct lrng_es_cb - callback defining an entropy source + * @name: Name of the entropy source. + * @get_ent: Fetch entropy into the entropy_buf. The ES shall only deliver + * data if its internal initialization is complete, including any + * SP800-90B startup testing or similar. + * @curr_entropy: Return amount of currently available entropy. + * @max_entropy: Maximum amount of entropy the entropy source is able to + * maintain. + * @state: Buffer with human-readable ES state. + * @reset: Reset entropy source (drop all entropy and reinitialize). + * This callback may be NULL. + * @switch_hash: callback to switch from an old hash callback definition to + * a new one. This callback may be NULL. + */ +struct lrng_es_cb { + const char *name; + void (*get_ent)(struct entropy_buf *eb, u32 requested_bits, + bool fully_seeded); + u32 (*curr_entropy)(u32 requested_bits); + u32 (*max_entropy)(void); + void (*state)(unsigned char *buf, size_t buflen); + void (*reset)(void); + int (*switch_hash)(struct lrng_drng *drng, int node, + const struct lrng_hash_cb *new_cb, void *new_hash, + const struct lrng_hash_cb *old_cb); +}; + +/* Allow entropy sources to tell the ES manager that new entropy is there */ +void lrng_es_add_entropy(void); + +/* Cap to maximum entropy that can ever be generated with given hash */ +#define lrng_cap_requested(__digestsize_bits, __requested_bits) \ + do { \ + if (__digestsize_bits < __requested_bits) { \ + pr_debug("Cannot satisfy requested entropy %u due to insufficient hash size %u\n",\ + __requested_bits, __digestsize_bits); \ + __requested_bits = __digestsize_bits; \ + } \ + } while (0) + +#endif /* _LRNG_ES_MGR_CB_H */ diff --git a/drivers/char/lrng/lrng_es_sched.c b/drivers/char/lrng/lrng_es_sched.c new file mode 100644 index 0000000000000..f7e85c381e226 --- /dev/null +++ b/drivers/char/lrng/lrng_es_sched.c @@ -0,0 +1,406 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Scheduler-based data collection + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "lrng_es_aux.h" +#include "lrng_es_sched.h" +#include "lrng_es_timer_common.h" +#include "lrng_health.h" +#include "lrng_numa.h" +#include "lrng_testing.h" + +/* + * Number of scheduler-based context switches to be recorded to assume that + * DRNG security strength bits of entropy are received. + * Note: a value below the DRNG security strength should not be defined as this + * may imply the DRNG can never be fully seeded in case other noise + * sources are unavailable. + */ +#define LRNG_SCHED_ENTROPY_BITS \ + LRNG_UINT32_C(CONFIG_LRNG_SCHED_ENTROPY_RATE) + +/* Number of events required for LRNG_DRNG_SECURITY_STRENGTH_BITS entropy */ +static u32 lrng_sched_entropy_bits = LRNG_SCHED_ENTROPY_BITS; + +static u32 sched_entropy __read_mostly = LRNG_SCHED_ENTROPY_BITS; +#ifdef CONFIG_LRNG_RUNTIME_ES_CONFIG +module_param(sched_entropy, uint, 0444); +MODULE_PARM_DESC(sched_entropy, + "How many scheduler-based context switches must be collected for obtaining 256 bits of entropy\n"); +#endif + +/* Per-CPU array holding concatenated entropy events */ +static DEFINE_PER_CPU(u32 [LRNG_DATA_ARRAY_SIZE], lrng_sched_array) + __aligned(LRNG_KCAPI_ALIGN); +static DEFINE_PER_CPU(u32, lrng_sched_array_ptr) = 0; +static DEFINE_PER_CPU(atomic_t, lrng_sched_array_events) = ATOMIC_INIT(0); + +static void __init lrng_sched_check_compression_state(void) +{ + /* One pool should hold sufficient entropy for disabled compression */ + u32 max_ent = min_t(u32, lrng_get_digestsize(), + lrng_data_to_entropy(LRNG_DATA_NUM_VALUES, + lrng_sched_entropy_bits)); + if (max_ent < lrng_security_strength()) { + pr_devel("Scheduler entropy source will never provide %u bits of entropy required for fully seeding the DRNG all by itself\n", + lrng_security_strength()); + } +} + +void __init lrng_sched_es_init(bool highres_timer) +{ + /* Set a minimum number of scheduler events that must be collected */ + sched_entropy = max_t(u32, LRNG_SCHED_ENTROPY_BITS, sched_entropy); + + if (highres_timer) { + lrng_sched_entropy_bits = sched_entropy; + } else { + u32 new_entropy = sched_entropy * LRNG_ES_OVERSAMPLING_FACTOR; + + lrng_sched_entropy_bits = (sched_entropy < new_entropy) ? + new_entropy : sched_entropy; + pr_warn("operating without high-resolution timer and applying oversampling factor %u\n", + LRNG_ES_OVERSAMPLING_FACTOR); + } + + lrng_sched_check_compression_state(); +} + +static u32 lrng_sched_avail_pool_size(void) +{ + u32 max_pool = lrng_get_digestsize(), + max_size = min_t(u32, max_pool, LRNG_DATA_NUM_VALUES); + int cpu; + + for_each_online_cpu(cpu) + max_size += max_pool; + + return max_size; +} + +/* Return entropy of unused scheduler events present in all per-CPU pools. */ +static u32 lrng_sched_avail_entropy(u32 __unused) +{ + u32 digestsize_events, events = 0; + int cpu; + + /* Only deliver entropy when SP800-90B self test is completed */ + if (!lrng_sp80090b_startup_complete_es(lrng_int_es_sched)) + return 0; + + /* Obtain the cap of maximum numbers of scheduler events we count */ + digestsize_events = lrng_entropy_to_data(lrng_get_digestsize(), + lrng_sched_entropy_bits); + /* Cap to max. number of scheduler events the array can hold */ + digestsize_events = min_t(u32, digestsize_events, LRNG_DATA_NUM_VALUES); + + for_each_online_cpu(cpu) { + events += min_t(u32, digestsize_events, + atomic_read_u32(per_cpu_ptr(&lrng_sched_array_events, + cpu))); + } + + /* Consider oversampling rate */ + return lrng_reduce_by_osr( + lrng_data_to_entropy(events, lrng_sched_entropy_bits)); +} + +/* + * Reset all per-CPU pools - reset entropy estimator but leave the pool data + * that may or may not have entropy unchanged. + */ +static void lrng_sched_reset(void) +{ + int cpu; + + /* Trigger GCD calculation anew. */ + lrng_gcd_set(0); + + for_each_online_cpu(cpu) + atomic_set(per_cpu_ptr(&lrng_sched_array_events, cpu), 0); +} + +/* + * Hash all per-CPU arrays and return the digest to be used as seed data for + * seeding a DRNG. The caller must guarantee backtracking resistance. + * The function will only copy as much data as entropy is available into the + * caller-provided output buffer. + * + * This function handles the translation from the number of received scheduler + * events into an entropy statement. The conversion depends on + * LRNG_SCHED_ENTROPY_BITS which defines how many scheduler events must be + * received to obtain 256 bits of entropy. With this value, the function + * lrng_data_to_entropy converts a given data size (received scheduler events, + * requested amount of data, etc.) into an entropy statement. + * lrng_entropy_to_data does the reverse. + * + * @eb: entropy buffer to store entropy + * @requested_bits: Requested amount of entropy + * @fully_seeded: indicator whether LRNG is fully seeded + */ +static void lrng_sched_pool_hash(struct entropy_buf *eb, u32 requested_bits, + bool fully_seeded) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_hash_cb *hash_cb; + struct lrng_drng *drng = lrng_drng_node_instance(); + u8 digest[LRNG_MAX_DIGESTSIZE]; + unsigned long flags; + u32 found_events, collected_events = 0, collected_ent_bits, + requested_events, returned_ent_bits, digestsize, digestsize_events; + int ret, cpu; + void *hash; + + /* Only deliver entropy when SP800-90B self test is completed */ + if (!lrng_sp80090b_startup_complete_es(lrng_int_es_sched)) { + eb->e_bits[lrng_int_es_sched] = 0; + return; + } + + /* Lock guarding replacement of per-NUMA hash */ + read_lock_irqsave(&drng->hash_lock, flags); + hash_cb = drng->hash_cb; + hash = drng->hash; + + /* The hash state of filled with all per-CPU pool hashes. */ + ret = hash_cb->hash_init(shash, hash); + if (ret) + goto err; + + digestsize = hash_cb->hash_digestsize(shash); + + /* Cap to maximum entropy that can ever be generated with given hash */ + lrng_cap_requested(digestsize << 3, requested_bits); + requested_events = lrng_entropy_to_data(requested_bits + + lrng_compress_osr(), + lrng_sched_entropy_bits); + digestsize_events = lrng_entropy_to_data(digestsize << 3, + lrng_sched_entropy_bits); + + /* + * Harvest entropy from each per-CPU hash state - even though we may + * have collected sufficient entropy, we will hash all per-CPU pools. + */ + for_each_online_cpu(cpu) { + u32 unused_events = 0; + + ret = hash_cb->hash_update(shash, + (u8 *)per_cpu_ptr(lrng_sched_array, cpu), + LRNG_DATA_ARRAY_SIZE * sizeof(u32)); + + /* Store all not-yet compressed data in data array into hash */ + ret = hash_cb->hash_update(shash, digest, digestsize); + if (ret) + goto err; + + /* Obtain entropy statement like for the entropy pool */ + found_events = atomic_xchg_relaxed( + per_cpu_ptr(&lrng_sched_array_events, cpu), 0); + /* Cap to maximum amount of data we can hold in hash */ + found_events = min_t(u32, found_events, digestsize_events); + /* Cap to maximum amount of data we can hold in array */ + found_events = min_t(u32, found_events, LRNG_DATA_NUM_VALUES); + + collected_events += found_events; + if (collected_events > requested_events) { + unused_events = collected_events - requested_events; + atomic_add_return_relaxed(unused_events, + per_cpu_ptr(&lrng_sched_array_events, cpu)); + collected_events = requested_events; + } + pr_debug("%u scheduler-based events used from entropy array of CPU %d, %u scheduler-based events remain unused\n", + found_events - unused_events, cpu, unused_events); + } + + ret = hash_cb->hash_final(shash, digest); + if (ret) + goto err; + + collected_ent_bits = lrng_data_to_entropy(collected_events, + lrng_sched_entropy_bits); + /* Apply oversampling: discount requested oversampling rate */ + returned_ent_bits = lrng_reduce_by_osr(collected_ent_bits); + + pr_debug("obtained %u bits by collecting %u bits of entropy from scheduler-based noise source\n", + returned_ent_bits, collected_ent_bits); + + /* + * Truncate to available entropy as implicitly allowed by SP800-90B + * section 3.1.5.1.1 table 1 which awards truncated hashes full + * entropy. + * + * During boot time, we read requested_bits data with + * returned_ent_bits entropy. In case our conservative entropy + * estimate underestimates the available entropy we can transport as + * much available entropy as possible. + */ + memcpy(eb->e[lrng_int_es_sched], digest, + fully_seeded ? returned_ent_bits >> 3 : requested_bits >> 3); + eb->e_bits[lrng_int_es_sched] = returned_ent_bits; + +out: + hash_cb->hash_desc_zero(shash); + read_unlock_irqrestore(&drng->hash_lock, flags); + memzero_explicit(digest, sizeof(digest)); + return; + +err: + eb->e_bits[lrng_int_es_sched] = 0; + goto out; +} + +/* + * Concatenate full 32 bit word at the end of time array even when current + * ptr is not aligned to sizeof(data). + */ +static void lrng_sched_array_add_u32(u32 data) +{ + /* Increment pointer by number of slots taken for input value */ + u32 pre_ptr, mask, ptr = this_cpu_add_return(lrng_sched_array_ptr, + LRNG_DATA_SLOTS_PER_UINT); + unsigned int pre_array; + + lrng_data_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_sched_array[pre_array], ~(0xffffffff & ~mask)); + this_cpu_or(lrng_sched_array[pre_array], data & ~mask); + + /* + * Continuous compression is not allowed for scheduler noise source, + * so do not call lrng_sched_array_to_hash here. + */ + + /* LSB of data go into current unit */ + this_cpu_write(lrng_sched_array[lrng_data_idx2array(ptr)], + data & mask); +} + +/* Concatenate data of max LRNG_DATA_SLOTSIZE_MASK at the end of time array */ +static void lrng_sched_array_add_slot(u32 data) +{ + /* Get slot */ + u32 ptr = this_cpu_inc_return(lrng_sched_array_ptr) & + LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + /* zeroization of slot to ensure the following OR adds the data */ + this_cpu_and(lrng_sched_array[array], + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot))); + /* Store data into slot */ + this_cpu_or(lrng_sched_array[array], lrng_data_slot_val(data, slot)); + + /* + * Continuous compression is not allowed for scheduler noise source, + * so do not call lrng_sched_array_to_hash here. + */ +} + +static void +lrng_time_process_common(u32 time, void(*add_time)(u32 data)) +{ + enum lrng_health_res health_test; + + if (lrng_raw_sched_hires_entropy_store(time)) + return; + + health_test = lrng_health_test(time, lrng_int_es_sched); + if (health_test > lrng_health_fail_use) + return; + + if (health_test == lrng_health_pass) + atomic_inc_return(this_cpu_ptr(&lrng_sched_array_events)); + + add_time(time); + + /* + * We cannot call lrng_es_add_entropy() as this would call a schedule + * operation that is not permissible in scheduler context. + * As the scheduler ES provides a high bandwidth of entropy, we assume + * that other reseed triggers happen to pick up the scheduler ES + * entropy in due time. + */ +} + +/* Batching up of entropy in per-CPU array */ +static void lrng_sched_time_process(void) +{ + u32 now_time = random_get_entropy(); + + if (unlikely(!lrng_gcd_tested())) { + /* When GCD is unknown, we process the full time stamp */ + lrng_time_process_common(now_time, lrng_sched_array_add_u32); + lrng_gcd_add_value(now_time); + } else { + /* GCD is known and applied */ + lrng_time_process_common((now_time / lrng_gcd_get()) & + LRNG_DATA_SLOTSIZE_MASK, + lrng_sched_array_add_slot); + } + + lrng_sched_perf_time(now_time); +} + +void add_sched_randomness(const struct task_struct *p, int cpu) +{ + if (lrng_highres_timer()) { + lrng_sched_time_process(); + } else { + u32 tmp = cpu; + + tmp ^= lrng_raw_sched_pid_entropy_store(p->pid) ? + 0 : (u32)p->pid; + tmp ^= lrng_raw_sched_starttime_entropy_store(p->start_time) ? + 0 : (u32)p->start_time; + tmp ^= lrng_raw_sched_nvcsw_entropy_store(p->nvcsw) ? + 0 : (u32)p->nvcsw; + + lrng_sched_time_process(); + lrng_sched_array_add_u32(tmp); + } +} +EXPORT_SYMBOL(add_sched_randomness); + +static void lrng_sched_es_state(unsigned char *buf, size_t buflen) +{ + const struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + + /* Assume the lrng_drng_init lock is taken by caller */ + snprintf(buf, buflen, + " Hash for operating entropy pool: %s\n" + " Available entropy: %u\n" + " per-CPU scheduler event collection size: %u\n" + " Standards compliance: %s\n" + " High-resolution timer: %s\n", + lrng_drng_init->hash_cb->hash_name(), + lrng_sched_avail_entropy(0), + LRNG_DATA_NUM_VALUES, + lrng_sp80090b_compliant(lrng_int_es_sched) ? "SP800-90B " : "", + lrng_highres_timer() ? "true" : "false"); +} + +struct lrng_es_cb lrng_es_sched = { + .name = "Scheduler", + .get_ent = lrng_sched_pool_hash, + .curr_entropy = lrng_sched_avail_entropy, + .max_entropy = lrng_sched_avail_pool_size, + .state = lrng_sched_es_state, + .reset = lrng_sched_reset, + .switch_hash = NULL, +}; diff --git a/drivers/char/lrng/lrng_es_sched.h b/drivers/char/lrng/lrng_es_sched.h new file mode 100644 index 0000000000000..f1e596dd89d9f --- /dev/null +++ b/drivers/char/lrng/lrng_es_sched.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_SCHED_H +#define _LRNG_ES_SCHED_H + +#include "lrng_es_mgr_cb.h" + +#ifdef CONFIG_LRNG_SCHED +void lrng_sched_es_init(bool highres_timer); + +extern struct lrng_es_cb lrng_es_sched; + +#else /* CONFIG_LRNG_SCHED */ +static inline void lrng_sched_es_init(bool highres_timer) { } +#endif /* CONFIG_LRNG_SCHED */ + +#endif /* _LRNG_ES_SCHED_H */ diff --git a/drivers/char/lrng/lrng_es_timer_common.c b/drivers/char/lrng/lrng_es_timer_common.c new file mode 100644 index 0000000000000..70f3ff074fe6e --- /dev/null +++ b/drivers/char/lrng/lrng_es_timer_common.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Slow Entropy Source: Interrupt data collection + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_es_irq.h" +#include "lrng_es_sched.h" +#include "lrng_es_timer_common.h" +#include "lrng_health.h" + +/* Is high-resolution timer present? */ +static bool lrng_highres_timer_val = false; + +/* Number of time stamps analyzed to calculate a GCD */ +#define LRNG_GCD_WINDOW_SIZE 100 +static u32 lrng_gcd_history[LRNG_GCD_WINDOW_SIZE]; +static atomic_t lrng_gcd_history_ptr = ATOMIC_INIT(-1); + +/* The common divisor for all timestamps */ +static u32 lrng_gcd_timer = 0; + +bool lrng_gcd_tested(void) +{ + return (lrng_gcd_timer != 0); +} + +u32 lrng_gcd_get(void) +{ + return lrng_gcd_timer; +} + +/* Set the GCD for use in IRQ ES - if 0, the GCD calculation is restarted. */ +void lrng_gcd_set(u32 running_gcd) +{ + lrng_gcd_timer = running_gcd; + /* Ensure that update to global variable lrng_gcd_timer is visible */ + mb(); +} + +static void lrng_gcd_set_check(u32 running_gcd) +{ + if (!lrng_gcd_tested()) { + lrng_gcd_set(running_gcd); + pr_debug("Setting GCD to %u\n", running_gcd); + } +} + +u32 lrng_gcd_analyze(u32 *history, size_t nelem) +{ + u32 running_gcd = 0; + size_t i; + + /* Now perform the analysis on the accumulated time data. */ + for (i = 0; i < nelem; i++) { + /* + * NOTE: this would be the place to add more analysis on the + * appropriateness of the timer like checking the presence + * of sufficient variations in the timer. + */ + + /* + * This calculates the gcd of all the time values. that is + * gcd(time_1, time_2, ..., time_nelem) + * + * Some timers increment by a fixed (non-1) amount each step. + * This code checks for such increments, and allows the library + * to output the number of such changes have occurred. + */ + running_gcd = (u32)gcd(history[i], running_gcd); + + /* Zeroize data */ + history[i] = 0; + } + + return running_gcd; +} + +void lrng_gcd_add_value(u32 time) +{ + u32 ptr = (u32)atomic_inc_return_relaxed(&lrng_gcd_history_ptr); + + if (ptr < LRNG_GCD_WINDOW_SIZE) { + lrng_gcd_history[ptr] = time; + } else if (ptr == LRNG_GCD_WINDOW_SIZE) { + u32 gcd = lrng_gcd_analyze(lrng_gcd_history, + LRNG_GCD_WINDOW_SIZE); + + if (!gcd) + gcd = 1; + + /* + * Ensure that we have variations in the time stamp below the + * given value. This is just a safety measure to prevent the GCD + * becoming too large. + */ + if (gcd >= 1000) { + pr_warn("calculated GCD is larger than expected: %u\n", + gcd); + gcd = 1000; + } + + /* Adjust all deltas by the observed (small) common factor. */ + lrng_gcd_set_check(gcd); + atomic_set(&lrng_gcd_history_ptr, 0); + } +} + +/* Return boolean whether LRNG identified presence of high-resolution timer */ +bool lrng_highres_timer(void) +{ + return lrng_highres_timer_val; +} + +static int __init lrng_init_time_source(void) +{ + if ((random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK) || + (random_get_entropy() & LRNG_DATA_SLOTSIZE_MASK)) { + /* + * As the highres timer is identified here, previous interrupts + * obtained during boot time are treated like a lowres-timer + * would have been present. + */ + lrng_highres_timer_val = true; + } else { + lrng_health_disable(); + lrng_highres_timer_val = false; + } + + lrng_irq_es_init(lrng_highres_timer_val); + lrng_sched_es_init(lrng_highres_timer_val); + + /* Ensure that changes to global variables are visible */ + mb(); + + return 0; +} +core_initcall(lrng_init_time_source); diff --git a/drivers/char/lrng/lrng_es_timer_common.h b/drivers/char/lrng/lrng_es_timer_common.h new file mode 100644 index 0000000000000..b45b9f683dea1 --- /dev/null +++ b/drivers/char/lrng/lrng_es_timer_common.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG Slow Noise Source: Time stamp array handling + * + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_ES_TIMER_COMMON_H +#define _LRNG_ES_TIMER_COMMON_H + +bool lrng_gcd_tested(void); +void lrng_gcd_set(u32 running_gcd); +u32 lrng_gcd_get(void); +u32 lrng_gcd_analyze(u32 *history, size_t nelem); +void lrng_gcd_add_value(u32 time); +bool lrng_highres_timer(void); + +/* + * To limit the impact on the interrupt handling, the LRNG concatenates + * entropic LSB parts of the time stamps in a per-CPU array and only + * injects them into the entropy pool when the array is full. + */ + +/* Store multiple integers in one u32 */ +#define LRNG_DATA_SLOTSIZE_BITS (8) +#define LRNG_DATA_SLOTSIZE_MASK ((1 << LRNG_DATA_SLOTSIZE_BITS) - 1) +#define LRNG_DATA_ARRAY_MEMBER_BITS (4 << 3) /* ((sizeof(u32)) << 3) */ +#define LRNG_DATA_SLOTS_PER_UINT (LRNG_DATA_ARRAY_MEMBER_BITS / \ + LRNG_DATA_SLOTSIZE_BITS) + +/* + * Number of time values to store in the array - in small environments + * only one atomic_t variable per CPU is used. + */ +#define LRNG_DATA_NUM_VALUES (CONFIG_LRNG_COLLECTION_SIZE) +/* Mask of LSB of time stamp to store */ +#define LRNG_DATA_WORD_MASK (LRNG_DATA_NUM_VALUES - 1) + +#define LRNG_DATA_SLOTS_MASK (LRNG_DATA_SLOTS_PER_UINT - 1) +#define LRNG_DATA_ARRAY_SIZE (LRNG_DATA_NUM_VALUES / \ + LRNG_DATA_SLOTS_PER_UINT) + +/* Starting bit index of slot */ +static inline unsigned int lrng_data_slot2bitindex(unsigned int slot) +{ + return (LRNG_DATA_SLOTSIZE_BITS * slot); +} + +/* Convert index into the array index */ +static inline unsigned int lrng_data_idx2array(unsigned int idx) +{ + return idx / LRNG_DATA_SLOTS_PER_UINT; +} + +/* Convert index into the slot of a given array index */ +static inline unsigned int lrng_data_idx2slot(unsigned int idx) +{ + return idx & LRNG_DATA_SLOTS_MASK; +} + +/* Convert value into slot value */ +static inline unsigned int lrng_data_slot_val(unsigned int val, + unsigned int slot) +{ + return val << lrng_data_slot2bitindex(slot); +} + +/* + * Return the pointers for the previous and current units to inject a u32 into. + * Also return the mask which the u32 word is to be processed. + */ +static inline void lrng_data_split_u32(u32 *ptr, u32 *pre_ptr, u32 *mask) +{ + /* ptr to previous unit */ + *pre_ptr = (*ptr - LRNG_DATA_SLOTS_PER_UINT) & LRNG_DATA_WORD_MASK; + *ptr &= LRNG_DATA_WORD_MASK; + + /* mask to split data into the two parts for the two units */ + *mask = ((1 << (*pre_ptr & (LRNG_DATA_SLOTS_PER_UINT - 1)) * + LRNG_DATA_SLOTSIZE_BITS)) - 1; +} + +#endif /* _LRNG_ES_TIMER_COMMON_H */ \ No newline at end of file diff --git a/drivers/char/lrng/lrng_hash_kcapi.c b/drivers/char/lrng/lrng_hash_kcapi.c new file mode 100644 index 0000000000000..13e62db9b6c8e --- /dev/null +++ b/drivers/char/lrng/lrng_hash_kcapi.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for providing the hash primitive using the kernel crypto API. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + + +static char *lrng_hash_name = "sha512"; + +/* The parameter must be r/o in sysfs as otherwise races appear. */ +module_param(lrng_hash_name, charp, 0444); +MODULE_PARM_DESC(lrng_hash_name, "Kernel crypto API hash name"); + +struct lrng_hash_info { + struct crypto_shash *tfm; +}; + +static const char *lrng_kcapi_hash_name(void) +{ + return lrng_hash_name; +} + +static void _lrng_kcapi_hash_free(struct lrng_hash_info *lrng_hash) +{ + struct crypto_shash *tfm = lrng_hash->tfm; + + crypto_free_shash(tfm); + kfree(lrng_hash); +} + +static void *lrng_kcapi_hash_alloc(const char *name) +{ + struct lrng_hash_info *lrng_hash; + struct crypto_shash *tfm; + int ret; + + if (!name) { + pr_err("Hash name missing\n"); + return ERR_PTR(-EINVAL); + } + + tfm = crypto_alloc_shash(name, 0, 0); + if (IS_ERR(tfm)) { + pr_err("could not allocate hash %s\n", name); + return ERR_CAST(tfm); + } + + ret = sizeof(struct lrng_hash_info); + lrng_hash = kmalloc(ret, GFP_KERNEL); + if (!lrng_hash) { + crypto_free_shash(tfm); + return ERR_PTR(-ENOMEM); + } + + lrng_hash->tfm = tfm; + + pr_info("Hash %s allocated\n", name); + + return lrng_hash; +} + +static void *lrng_kcapi_hash_name_alloc(void) +{ + return lrng_kcapi_hash_alloc(lrng_kcapi_hash_name()); +} + +static u32 lrng_kcapi_hash_digestsize(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + return crypto_shash_digestsize(tfm); +} + +static void lrng_kcapi_hash_dealloc(void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + + _lrng_kcapi_hash_free(lrng_hash); + pr_info("Hash deallocated\n"); +} + +static int lrng_kcapi_hash_init(struct shash_desc *shash, void *hash) +{ + struct lrng_hash_info *lrng_hash = (struct lrng_hash_info *)hash; + struct crypto_shash *tfm = lrng_hash->tfm; + + shash->tfm = tfm; + return crypto_shash_init(shash); +} + +static int lrng_kcapi_hash_update(struct shash_desc *shash, const u8 *inbuf, + u32 inbuflen) +{ + return crypto_shash_update(shash, inbuf, inbuflen); +} + +static int lrng_kcapi_hash_final(struct shash_desc *shash, u8 *digest) +{ + return crypto_shash_final(shash, digest); +} + +static void lrng_kcapi_hash_zero(struct shash_desc *shash) +{ + shash_desc_zero(shash); +} + +static const struct lrng_hash_cb lrng_kcapi_hash_cb = { + .hash_name = lrng_kcapi_hash_name, + .hash_alloc = lrng_kcapi_hash_name_alloc, + .hash_dealloc = lrng_kcapi_hash_dealloc, + .hash_digestsize = lrng_kcapi_hash_digestsize, + .hash_init = lrng_kcapi_hash_init, + .hash_update = lrng_kcapi_hash_update, + .hash_final = lrng_kcapi_hash_final, + .hash_desc_zero = lrng_kcapi_hash_zero, +}; + +static int __init lrng_kcapi_init(void) +{ + return lrng_set_hash_cb(&lrng_kcapi_hash_cb); +} + +static void __exit lrng_kcapi_exit(void) +{ + lrng_set_hash_cb(NULL); +} + +late_initcall(lrng_kcapi_init); +module_exit(lrng_kcapi_exit); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager - Kernel crypto API hash backend"); diff --git a/drivers/char/lrng/lrng_health.c b/drivers/char/lrng/lrng_health.c new file mode 100644 index 0000000000000..a60c8378eea5d --- /dev/null +++ b/drivers/char/lrng/lrng_health.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Entropy Source and DRNG Manager (LRNG) Health Testing + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_es_mgr.h" +#include "lrng_health.h" + +/* Stuck Test */ +struct lrng_stuck_test { + u32 last_time; /* Stuck test: time of previous IRQ */ + u32 last_delta; /* Stuck test: delta of previous IRQ */ + u32 last_delta2; /* Stuck test: 2. time derivation of prev IRQ */ +}; + +/* Repetition Count Test */ +struct lrng_rct { + atomic_t rct_count; /* Number of stuck values */ +}; + +/* Adaptive Proportion Test */ +struct lrng_apt { + /* Data window size */ +#define LRNG_APT_WINDOW_SIZE 512 + /* LSB of time stamp to process */ +#define LRNG_APT_LSB 16 +#define LRNG_APT_WORD_MASK (LRNG_APT_LSB - 1) + atomic_t apt_count; /* APT counter */ + atomic_t apt_base; /* APT base reference */ + + atomic_t apt_trigger; + bool apt_base_set; /* Is APT base set? */ +}; + +/* Health data collected for one entropy source */ +struct lrng_health_es_state { + struct lrng_rct rct; + struct lrng_apt apt; + + /* SP800-90B startup health tests */ +#define LRNG_SP80090B_STARTUP_SAMPLES 1024 +#define LRNG_SP80090B_STARTUP_BLOCKS ((LRNG_SP80090B_STARTUP_SAMPLES + \ + LRNG_APT_WINDOW_SIZE - 1) / \ + LRNG_APT_WINDOW_SIZE) + bool sp80090b_startup_done; + atomic_t sp80090b_startup_blocks; +}; + +#define LRNG_HEALTH_ES_INIT(x) \ + x.rct.rct_count = ATOMIC_INIT(0), \ + x.apt.apt_count = ATOMIC_INIT(0), \ + x.apt.apt_base = ATOMIC_INIT(-1), \ + x.apt.apt_trigger = ATOMIC_INIT(LRNG_APT_WINDOW_SIZE), \ + x.apt.apt_base_set = false, \ + x.sp80090b_startup_blocks = ATOMIC_INIT(LRNG_SP80090B_STARTUP_BLOCKS), \ + x.sp80090b_startup_done = false, + +/* The health test code must operate lock-less */ +struct lrng_health { + bool health_test_enabled; + struct lrng_health_es_state es_state[lrng_int_es_last]; +}; + +static struct lrng_health lrng_health = { + .health_test_enabled = true, + +#ifdef CONFIG_LRNG_IRQ + LRNG_HEALTH_ES_INIT(.es_state[lrng_int_es_irq]) +#endif +#ifdef CONFIG_LRNG_SCHED + LRNG_HEALTH_ES_INIT(.es_state[lrng_int_es_sched]) +#endif +}; + +static DEFINE_PER_CPU(struct lrng_stuck_test[lrng_int_es_last], + lrng_stuck_test_array); + +static bool lrng_sp80090b_health_requested(void) +{ + /* Health tests are only requested in FIPS mode */ + return fips_enabled; +} + +static bool lrng_sp80090b_health_enabled(void) +{ + struct lrng_health *health = &lrng_health; + + return lrng_sp80090b_health_requested() && health->health_test_enabled; +} + +/*************************************************************************** + * SP800-90B Compliance + * + * If the Linux-RNG is booted into FIPS mode, the following interfaces + * provide an SP800-90B compliant noise source: + * + * * /dev/random + * * getrandom(2) + * * get_random_bytes_full + * + * All other interfaces, including /dev/urandom or get_random_bytes without + * the add_random_ready_callback cannot claim to use an SP800-90B compliant + * noise source. + ***************************************************************************/ + +/* + * Perform SP800-90B startup testing + */ +static void lrng_sp80090b_startup(struct lrng_health *health, + enum lrng_internal_es es) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + + if (!es_state->sp80090b_startup_done && + atomic_dec_and_test(&es_state->sp80090b_startup_blocks)) { + es_state->sp80090b_startup_done = true; + pr_info("SP800-90B startup health tests for internal entropy source %u completed\n", + es); + lrng_drng_force_reseed(); + + /* + * We cannot call lrng_es_add_entropy() as this may cause a + * schedule operation while in scheduler context for the + * scheduler ES. + */ + } +} + +/* + * Handle failure of SP800-90B startup testing + */ +static void lrng_sp80090b_startup_failure(struct lrng_health *health, + enum lrng_internal_es es) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + + /* Reset of LRNG and its entropy - NOTE: we are in atomic context */ + lrng_reset(); + + /* + * Reset the SP800-90B startup test. + * + * NOTE SP800-90B section 4.3 bullet 4 does not specify what + * exactly is to be done in case of failure! Thus, we do what + * makes sense, i.e. restarting the health test and thus gating + * the output function of /dev/random and getrandom(2). + */ + atomic_set(&es_state->sp80090b_startup_blocks, + LRNG_SP80090B_STARTUP_BLOCKS); +} + +/* + * Handle failure of SP800-90B runtime testing + */ +static void lrng_sp80090b_runtime_failure(struct lrng_health *health, + enum lrng_internal_es es) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + + lrng_sp80090b_startup_failure(health, es); + es_state->sp80090b_startup_done = false; +} + +static void lrng_sp80090b_failure(struct lrng_health *health, + enum lrng_internal_es es) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + + if (es_state->sp80090b_startup_done) { + pr_err("SP800-90B runtime health test failure for internal entropy source %u - invalidating all existing entropy and initiate SP800-90B startup\n", es); + lrng_sp80090b_runtime_failure(health, es); + } else { + pr_err("SP800-90B startup test failure for internal entropy source %u - resetting\n", es); + lrng_sp80090b_startup_failure(health, es); + } +} + +bool lrng_sp80090b_startup_complete_es(enum lrng_internal_es es) +{ + struct lrng_health *health = &lrng_health; + struct lrng_health_es_state *es_state = &health->es_state[es]; + + if (!lrng_sp80090b_health_enabled()) + return true; + + return es_state->sp80090b_startup_done; +} + +bool lrng_sp80090b_compliant(enum lrng_internal_es es) +{ + struct lrng_health *health = &lrng_health; + struct lrng_health_es_state *es_state = &health->es_state[es]; + + return lrng_sp80090b_health_enabled() && + es_state->sp80090b_startup_done; +} + +/*************************************************************************** + * Adaptive Proportion Test + * + * This test complies with SP800-90B section 4.4.2. + ***************************************************************************/ + +/* + * Reset the APT counter + * + * @health [in] Reference to health state + */ +static void lrng_apt_reset(struct lrng_apt *apt, unsigned int time_masked) +{ + /* Reset APT */ + atomic_set(&apt->apt_count, 0); + atomic_set(&apt->apt_base, time_masked); +} + +static void lrng_apt_restart(struct lrng_apt *apt) +{ + atomic_set(&apt->apt_trigger, LRNG_APT_WINDOW_SIZE); +} + +/* + * Insert a new entropy event into APT + * + * This function does is void as it does not decide about the fate of a time + * stamp. An APT failure can only happen at the same time of a stuck test + * failure. Thus, the stuck failure will already decide how the time stamp + * is handled. + * + * @health [in] Reference to health state + * @now_time [in] Time stamp to process + */ +static void lrng_apt_insert(struct lrng_health *health, + unsigned int now_time, enum lrng_internal_es es) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + struct lrng_apt *apt = &es_state->apt; + + if (!lrng_sp80090b_health_requested()) + return; + + now_time &= LRNG_APT_WORD_MASK; + + /* Initialization of APT */ + if (!apt->apt_base_set) { + atomic_set(&apt->apt_base, now_time); + apt->apt_base_set = true; + return; + } + + if (now_time == (unsigned int)atomic_read(&apt->apt_base)) { + u32 apt_val = (u32)atomic_inc_return_relaxed(&apt->apt_count); + + if (apt_val >= CONFIG_LRNG_APT_CUTOFF) + lrng_sp80090b_failure(health, es); + } + + if (atomic_dec_and_test(&apt->apt_trigger)) { + lrng_apt_restart(apt); + lrng_apt_reset(apt, now_time); + lrng_sp80090b_startup(health, es); + } +} + +/*************************************************************************** + * Repetition Count Test + * + * The LRNG uses an enhanced version of the Repetition Count Test + * (RCT) specified in SP800-90B section 4.4.1. Instead of counting identical + * back-to-back values, the input to the RCT is the counting of the stuck + * values while filling the entropy pool. + * + * The RCT is applied with an alpha of 2^-30 compliant to FIPS 140-2 IG 9.8. + * + * During the counting operation, the LRNG always calculates the RCT + * cut-off value of C. If that value exceeds the allowed cut-off value, + * the LRNG will invalidate all entropy for the entropy pool which implies + * that no data can be extracted from the entropy pool unless new entropy + * is received. + ***************************************************************************/ + +/* + * Hot code path - Insert data for Repetition Count Test + * + * @health: Reference to health information + * @stuck: Decision of stuck test + */ +static void lrng_rct(struct lrng_health *health, enum lrng_internal_es es, + int stuck) +{ + struct lrng_health_es_state *es_state = &health->es_state[es]; + struct lrng_rct *rct = &es_state->rct; + + if (!lrng_sp80090b_health_requested()) + return; + + if (stuck) { + u32 rct_count = atomic_add_return_relaxed(1, &rct->rct_count); + + /* + * The cutoff value is based on the following consideration: + * alpha = 2^-30 as recommended in FIPS 140-2 IG 9.8. + * In addition, we imply an entropy value H of 1 bit as this + * is the minimum entropy required to provide full entropy. + * + * Note, rct_count (which equals to value B in the + * pseudo code of SP800-90B section 4.4.1) starts with zero. + * Hence we need to subtract one from the cutoff value as + * calculated following SP800-90B. + */ + if (rct_count >= CONFIG_LRNG_RCT_CUTOFF) { + atomic_set(&rct->rct_count, 0); + + /* + * APT must start anew as we consider all previously + * recorded data to contain no entropy. + */ + lrng_apt_restart(&es_state->apt); + + lrng_sp80090b_failure(health, es); + } + } else { + atomic_set(&rct->rct_count, 0); + } +} + +/*************************************************************************** + * Stuck Test + * + * Checking the: + * 1st derivative of the event occurrence (time delta) + * 2nd derivative of the event occurrence (delta of time deltas) + * 3rd derivative of the event occurrence (delta of delta of time deltas) + * + * All values must always be non-zero. The stuck test is only valid disabled if + * high-resolution time stamps are identified after initialization. + ***************************************************************************/ + +static u32 lrng_delta(u32 prev, u32 next) +{ + /* + * Note that this (unsigned) subtraction does yield the correct value + * in the wraparound-case, i.e. when next < prev. + */ + return (next - prev); +} + +/* + * Hot code path + * + * @health: Reference to health information + * @now: Event time + * @return: 0 event occurrence not stuck (good time stamp) + * != 0 event occurrence stuck (reject time stamp) + */ +static int lrng_irq_stuck(enum lrng_internal_es es, u32 now_time) +{ + struct lrng_stuck_test *stuck = this_cpu_ptr(lrng_stuck_test_array); + u32 delta = lrng_delta(stuck[es].last_time, now_time); + u32 delta2 = lrng_delta(stuck[es].last_delta, delta); + u32 delta3 = lrng_delta(stuck[es].last_delta2, delta2); + + stuck[es].last_time = now_time; + stuck[es].last_delta = delta; + stuck[es].last_delta2 = delta2; + + if (!delta || !delta2 || !delta3) + return 1; + + return 0; +} + +/*************************************************************************** + * Health test interfaces + ***************************************************************************/ + +/* + * Disable all health tests + */ +void lrng_health_disable(void) +{ + struct lrng_health *health = &lrng_health; + + health->health_test_enabled = false; + + if (lrng_sp80090b_health_requested()) + pr_warn("SP800-90B compliance requested but the Linux RNG is NOT SP800-90B compliant\n"); +} + +/* + * Hot code path - Perform health test on time stamp received from an event + * + * @now_time Time stamp + */ +enum lrng_health_res lrng_health_test(u32 now_time, enum lrng_internal_es es) +{ + struct lrng_health *health = &lrng_health; + int stuck; + + if (!health->health_test_enabled) + return lrng_health_pass; + + lrng_apt_insert(health, now_time, es); + + stuck = lrng_irq_stuck(es, now_time); + lrng_rct(health, es, stuck); + if (stuck) { + /* SP800-90B disallows using a failing health test time stamp */ + return lrng_sp80090b_health_requested() ? + lrng_health_fail_drop : lrng_health_fail_use; + } + + return lrng_health_pass; +} diff --git a/drivers/char/lrng/lrng_health.h b/drivers/char/lrng/lrng_health.h new file mode 100644 index 0000000000000..4f9f5033fc307 --- /dev/null +++ b/drivers/char/lrng/lrng_health.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_HEALTH_H +#define _LRNG_HEALTH_H + +#include "lrng_es_mgr.h" + +enum lrng_health_res { + lrng_health_pass, /* Health test passes on time stamp */ + lrng_health_fail_use, /* Time stamp unhealthy, but mix in */ + lrng_health_fail_drop /* Time stamp unhealthy, drop it */ +}; + +#ifdef CONFIG_LRNG_HEALTH_TESTS +bool lrng_sp80090b_startup_complete_es(enum lrng_internal_es es); +bool lrng_sp80090b_compliant(enum lrng_internal_es es); + +enum lrng_health_res lrng_health_test(u32 now_time, enum lrng_internal_es es); +void lrng_health_disable(void); +#else /* CONFIG_LRNG_HEALTH_TESTS */ +static inline bool lrng_sp80090b_startup_complete_es(enum lrng_internal_es es) +{ + return true; +} + +static inline bool lrng_sp80090b_compliant(enum lrng_internal_es es) +{ + return false; +} + +static inline enum lrng_health_res +lrng_health_test(u32 now_time, enum lrng_internal_es es) +{ + return lrng_health_pass; +} +static inline void lrng_health_disable(void) { } +#endif /* CONFIG_LRNG_HEALTH_TESTS */ + +#endif /* _LRNG_HEALTH_H */ diff --git a/drivers/char/lrng/lrng_interface_aux.c b/drivers/char/lrng/lrng_interface_aux.c new file mode 100644 index 0000000000000..0eb49bd6b48f6 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_aux.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG auxiliary interfaces + * + * Copyright (C) 2022 Stephan Mueller + * Copyright (C) 2017 Jason A. Donenfeld . All + * Rights Reserved. + * Copyright (C) 2016 Jason Cooper + */ + +#include +#include +#include + +#include "lrng_es_mgr.h" +#include "lrng_interface_random_kernel.h" + +struct batched_entropy { + union { + u64 entropy_u64[LRNG_DRNG_BLOCKSIZE / sizeof(u64)]; + u32 entropy_u32[LRNG_DRNG_BLOCKSIZE / sizeof(u32)]; + }; + unsigned int position; + spinlock_t batch_lock; +}; + +/* + * Get a random word for internal kernel use only. The quality of the random + * number is as good as /dev/urandom, but there is no backtrack protection, + * with the goal of being quite fast and not depleting entropy. + */ +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u64.lock), +}; + +u64 get_random_u64(void) +{ + u64 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u64"); + + batch = raw_cpu_ptr(&batched_entropy_u64); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + lrng_get_random_bytes(batch->entropy_u64, LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u64[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u64); + +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { + .batch_lock = __SPIN_LOCK_UNLOCKED(batched_entropy_u32.lock), +}; + +u32 get_random_u32(void) +{ + u32 ret; + unsigned long flags; + struct batched_entropy *batch; + + lrng_debug_report_seedlevel("get_random_u32"); + + batch = raw_cpu_ptr(&batched_entropy_u32); + spin_lock_irqsave(&batch->batch_lock, flags); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + lrng_get_random_bytes(batch->entropy_u32, LRNG_DRNG_BLOCKSIZE); + batch->position = 0; + } + ret = batch->entropy_u32[batch->position++]; + spin_unlock_irqrestore(&batch->batch_lock, flags); + return ret; +} +EXPORT_SYMBOL(get_random_u32); + +#ifdef CONFIG_SMP +/* + * This function is called when the CPU is coming up, with entry + * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. + */ +int random_prepare_cpu(unsigned int cpu) +{ + /* + * When the cpu comes back online, immediately invalidate all batches, + * so that we serve fresh randomness. + */ + per_cpu_ptr(&batched_entropy_u32, cpu)->position = 0; + per_cpu_ptr(&batched_entropy_u64, cpu)->position = 0; + return 0; +} + +int random_online_cpu(unsigned int cpu) +{ + return 0; +} +#endif + +/* + * It's important to invalidate all potential batched entropy that might + * be stored before the crng is initialized, which we can do lazily by + * simply resetting the counter to zero so that it's re-extracted on the + * next usage. + */ +void invalidate_batched_entropy(void) +{ + int cpu; + unsigned long flags; + + for_each_possible_cpu(cpu) { + struct batched_entropy *batched_entropy; + + batched_entropy = per_cpu_ptr(&batched_entropy_u32, cpu); + spin_lock_irqsave(&batched_entropy->batch_lock, flags); + batched_entropy->position = 0; + spin_unlock(&batched_entropy->batch_lock); + + batched_entropy = per_cpu_ptr(&batched_entropy_u64, cpu); + spin_lock(&batched_entropy->batch_lock); + batched_entropy->position = 0; + spin_unlock_irqrestore(&batched_entropy->batch_lock, flags); + } +} diff --git a/drivers/char/lrng/lrng_interface_dev.c b/drivers/char/lrng/lrng_interface_dev.c new file mode 100644 index 0000000000000..e60060d402b30 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_dev.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG user space device file interface + * + * Copyright (C) 2022, Stephan Mueller + */ + +#include +#include + +#include "lrng_interface_dev_common.h" + +static const struct file_operations lrng_fops = { + .read = lrng_drng_read_block, + .write = lrng_drng_write, + .poll = lrng_random_poll, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +static struct miscdevice lrng_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "lrng", + .nodename = "lrng", + .fops = &lrng_fops, + .mode = 0666 +}; + +static int __init lrng_dev_if_mod_init(void) +{ + return misc_register(&lrng_miscdev); +} +device_initcall(lrng_dev_if_mod_init); diff --git a/drivers/char/lrng/lrng_interface_dev_common.c b/drivers/char/lrng/lrng_interface_dev_common.c new file mode 100644 index 0000000000000..8e80d5bd9773d --- /dev/null +++ b/drivers/char/lrng/lrng_interface_dev_common.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG User and kernel space interfaces + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_interface_dev_common.h" + +DECLARE_WAIT_QUEUE_HEAD(lrng_write_wait); +static struct fasync_struct *fasync; + +static bool lrng_seed_hw = true; /* Allow HW to provide seed */ +static bool lrng_seed_user = true; /* Allow user space to provide seed */ + +/********************************** Helper ***********************************/ + +static u32 lrng_get_aux_ent(void) +{ + return lrng_es[lrng_ext_es_aux]->curr_entropy(0); +} + +/* Is the DRNG seed level too low? */ +bool lrng_need_entropy(void) +{ + return (lrng_get_aux_ent() < lrng_write_wakeup_bits); +} + +void lrng_writer_wakeup(void) +{ + if (lrng_need_entropy() && wq_has_sleeper(&lrng_write_wait)) { + wake_up_interruptible(&lrng_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } +} + +void lrng_init_wakeup_dev(void) +{ + kill_fasync(&fasync, SIGIO, POLL_IN); +} + +/* External entropy provider is allowed to provide seed data */ +bool lrng_state_exseed_allow(enum lrng_external_noise_source source) +{ + if (source == lrng_noise_source_hw) + return lrng_seed_hw; + return lrng_seed_user; +} + +/* Enable / disable external entropy provider to furnish seed */ +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type) +{ + /* + * If the LRNG is not yet operational, allow all entropy sources + * to deliver data unconditionally to get fully seeded asap. + */ + if (!lrng_state_operational()) + return; + + if (source == lrng_noise_source_hw) + lrng_seed_hw = type; + else + lrng_seed_user = type; +} + +void lrng_state_exseed_allow_all(void) +{ + lrng_state_exseed_set(lrng_noise_source_hw, true); + lrng_state_exseed_set(lrng_noise_source_user, true); +} + +/************************ LRNG user output interfaces *************************/ + +ssize_t lrng_read_common(char __user *buf, size_t nbytes) +{ + ssize_t ret = 0; + u8 tmpbuf[LRNG_DRNG_BLOCKSIZE] __aligned(LRNG_KCAPI_ALIGN); + u8 *tmp_large = NULL, *tmp = tmpbuf; + u32 tmplen = sizeof(tmpbuf); + + if (nbytes == 0) + return 0; + + /* + * Satisfy large read requests -- as the common case are smaller + * request sizes, such as 16 or 32 bytes, avoid a kmalloc overhead for + * those by using the stack variable of tmpbuf. + */ + if (!CONFIG_BASE_SMALL && (nbytes > sizeof(tmpbuf))) { + tmplen = min_t(u32, nbytes, LRNG_DRNG_MAX_REQSIZE); + tmp_large = kmalloc(tmplen + LRNG_KCAPI_ALIGN, GFP_KERNEL); + if (!tmp_large) + tmplen = sizeof(tmpbuf); + else + tmp = PTR_ALIGN(tmp_large, LRNG_KCAPI_ALIGN); + } + + while (nbytes) { + u32 todo = min_t(u32, nbytes, tmplen); + int rc = 0; + + /* Reschedule if we received a large request. */ + if ((tmp_large) && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + rc = lrng_drng_get_sleep(tmp, todo); + if (rc <= 0) { + if (rc < 0) + ret = rc; + break; + } + if (copy_to_user(buf, tmp, rc)) { + ret = -EFAULT; + break; + } + + nbytes -= rc; + buf += rc; + ret += rc; + } + + /* Wipe data just returned from memory */ + if (tmp_large) + kfree_sensitive(tmp_large); + else + memzero_explicit(tmpbuf, sizeof(tmpbuf)); + + return ret; +} + +ssize_t lrng_read_common_block(int nonblock, char __user *buf, size_t nbytes) +{ + int ret; + + if (nbytes == 0) + return 0; + + ret = lrng_drng_sleep_while_nonoperational(nonblock); + if (ret) + return ret; + + return lrng_read_common(buf, nbytes); +} + +ssize_t lrng_drng_read_block(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos) +{ + return lrng_read_common_block(file->f_flags & O_NONBLOCK, buf, nbytes); +} + +__poll_t lrng_random_poll(struct file *file, poll_table *wait) +{ + __poll_t mask; + + poll_wait(file, &lrng_init_wait, wait); + poll_wait(file, &lrng_write_wait, wait); + mask = 0; + if (lrng_state_operational()) + mask |= EPOLLIN | EPOLLRDNORM; + if (lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_user)) { + lrng_state_exseed_set(lrng_noise_source_user, false); + mask |= EPOLLOUT | EPOLLWRNORM; + } + return mask; +} + +ssize_t lrng_drng_write_common(const char __user *buffer, size_t count, + u32 entropy_bits) +{ + ssize_t ret = 0; + u8 buf[64] __aligned(LRNG_KCAPI_ALIGN); + const char __user *p = buffer; + u32 orig_entropy_bits = entropy_bits; + + if (!lrng_get_available()) { + ret = lrng_drng_initalize(); + if (!ret) + return ret; + } + + count = min_t(size_t, count, INT_MAX); + while (count > 0) { + size_t bytes = min_t(size_t, count, sizeof(buf)); + u32 ent = min_t(u32, bytes<<3, entropy_bits); + + if (copy_from_user(&buf, p, bytes)) + return -EFAULT; + /* Inject data into entropy pool */ + lrng_pool_insert_aux(buf, bytes, ent); + + count -= bytes; + p += bytes; + ret += bytes; + entropy_bits -= ent; + + cond_resched(); + } + + /* Force reseed of DRNG during next data request. */ + if (!orig_entropy_bits) + lrng_drng_force_reseed(); + + return ret; +} + +ssize_t lrng_drng_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + return lrng_drng_write_common(buffer, count, 0); +} + +long lrng_ioctl(struct file *f, unsigned int cmd, unsigned long arg) +{ + u32 digestsize_bits; + int size, ent_count_bits; + int __user *p = (int __user *)arg; + + switch (cmd) { + case RNDGETENTCNT: + ent_count_bits = lrng_avail_entropy(); + if (put_user(ent_count_bits, p)) + return -EFAULT; + return 0; + case RNDADDTOENTCNT: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p)) + return -EFAULT; + ent_count_bits = (int)lrng_get_aux_ent() + ent_count_bits; + if (ent_count_bits < 0) + ent_count_bits = 0; + digestsize_bits = lrng_get_digestsize(); + if (ent_count_bits > digestsize_bits) + ent_count_bits = digestsize_bits; + lrng_pool_set_entropy(ent_count_bits); + return 0; + case RNDADDENTROPY: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (get_user(ent_count_bits, p++)) + return -EFAULT; + if (ent_count_bits < 0) + return -EINVAL; + if (get_user(size, p++)) + return -EFAULT; + if (size < 0) + return -EINVAL; + /* there cannot be more entropy than data */ + ent_count_bits = min(ent_count_bits, size<<3); + return lrng_drng_write_common((const char __user *)p, size, + ent_count_bits); + case RNDZAPENTCNT: + case RNDCLEARPOOL: + /* Clear the entropy pool counter. */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + lrng_pool_set_entropy(0); + return 0; + case RNDRESEEDCRNG: + /* + * We leave the capability check here since it is present + * in the upstream's RNG implementation. Yet, user space + * can trigger a reseed as easy as writing into /dev/random + * or /dev/urandom where no privilege is needed. + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Force a reseed of all DRNGs */ + lrng_drng_force_reseed(); + return 0; + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(lrng_ioctl); + +int lrng_fasync(int fd, struct file *filp, int on) +{ + return fasync_helper(fd, filp, on, &fasync); +} diff --git a/drivers/char/lrng/lrng_interface_dev_common.h b/drivers/char/lrng/lrng_interface_dev_common.h new file mode 100644 index 0000000000000..87cf504647d31 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_dev_common.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_INTERFACE_DEV_COMMON_H +#define _LRNG_INTERFACE_DEV_COMMON_H + +#include +#include + +/******************* Upstream functions hooked into the LRNG ******************/ +enum lrng_external_noise_source { + lrng_noise_source_hw, + lrng_noise_source_user +}; + +#ifdef CONFIG_LRNG_COMMON_DEV_IF +void lrng_writer_wakeup(void); +void lrng_init_wakeup_dev(void); +void lrng_state_exseed_set(enum lrng_external_noise_source source, bool type); +void lrng_state_exseed_allow_all(void); +#else /* CONFIG_LRNG_COMMON_DEV_IF */ +static inline void lrng_writer_wakeup(void) { } +static inline void lrng_init_wakeup_dev(void) { } +static inline void +lrng_state_exseed_set(enum lrng_external_noise_source source, bool type) { } +static inline void lrng_state_exseed_allow_all(void) { } +#endif /* CONFIG_LRNG_COMMON_DEV_IF */ + +/****** Downstream service functions to actual interface implementations ******/ + +bool lrng_state_exseed_allow(enum lrng_external_noise_source source); +int lrng_fasync(int fd, struct file *filp, int on); +long lrng_ioctl(struct file *f, unsigned int cmd, unsigned long arg); +ssize_t lrng_drng_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos); +ssize_t lrng_drng_write_common(const char __user *buffer, size_t count, + u32 entropy_bits); +__poll_t lrng_random_poll(struct file *file, poll_table *wait); +ssize_t lrng_read_common_block(int nonblock, char __user *buf, size_t nbytes); +ssize_t lrng_drng_read_block(struct file *file, char __user *buf, size_t nbytes, + loff_t *ppos); +ssize_t lrng_read_common(char __user *buf, size_t nbytes); +bool lrng_need_entropy(void); + +extern struct wait_queue_head lrng_write_wait; +extern struct wait_queue_head lrng_init_wait; + +#endif /* _LRNG_INTERFACE_DEV_COMMON_H */ diff --git a/drivers/char/lrng/lrng_interface_hwrand.c b/drivers/char/lrng/lrng_interface_hwrand.c new file mode 100644 index 0000000000000..e841eea133482 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_hwrand.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG interface with the HW-Random framework + * + * Copyright (C) 2022, Stephan Mueller + */ + +#include +#include +#include + +static int lrng_hwrand_if_random(struct hwrng *rng, void *buf, size_t max, + bool wait) +{ + /* + * lrng_get_random_bytes_full not called as we cannot block. + * + * Note: We should either adjust .quality below depending on + * rng_is_initialized() or block here, but neither is not supported by + * the hw_rand framework. + */ + lrng_get_random_bytes(buf, max); + return (int)max; +} + +static struct hwrng lrng_hwrand = { + .name = "lrng", + .init = NULL, + .cleanup = NULL, + .read = lrng_hwrand_if_random, + + /* + * We set .quality only in case the LRNG does not provide the common + * interfaces or does not use the legacy RNG as entropy source. This + * shall avoid that the LRNG automatically spawns the hw_rand + * framework's hwrng kernel thread to feed data into + * add_hwgenerator_randomness. When the LRNG implements the common + * interfaces, this function feeds the data directly into the LRNG. + * If the LRNG uses the legacy RNG as entropy source, + * add_hwgenerator_randomness is implemented by the legacy RNG, but + * still eventually feeds the data into the LRNG. We should avoid such + * circular loops. + * + * We can specify full entropy here, because the LRNG is designed + * to provide full entropy. + */ +#if !defined(CONFIG_LRNG_RANDOM_IF) && \ + !defined(CONFIG_LRNG_KERNEL_RNG) + .quality = 1024, +#endif +}; + +static int __init lrng_hwrand_if_mod_init(void) +{ + return hwrng_register(&lrng_hwrand); +} + +static void __exit lrng_hwrand_if_mod_exit(void) +{ + hwrng_unregister(&lrng_hwrand); +} + +module_init(lrng_hwrand_if_mod_init); +module_exit(lrng_hwrand_if_mod_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager HW-Random Interface"); diff --git a/drivers/char/lrng/lrng_interface_kcapi.c b/drivers/char/lrng/lrng_interface_kcapi.c new file mode 100644 index 0000000000000..4cb511f8088e0 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_kcapi.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG interface with the RNG framework of the kernel crypto API + * + * Copyright (C) 2022, Stephan Mueller + */ + +#include +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_aux.h" + +static int lrng_kcapi_if_init(struct crypto_tfm *tfm) +{ + return 0; +} + +static void lrng_kcapi_if_cleanup(struct crypto_tfm *tfm) { } + +static int lrng_kcapi_if_reseed(const u8 *src, unsigned int slen) +{ + int ret; + + if (!slen) + return 0; + + /* Insert caller-provided data without crediting entropy */ + ret = lrng_pool_insert_aux((u8 *)src, slen, 0); + if (ret) + return ret; + + /* Make sure the new data is immediately available to DRNG */ + lrng_drng_force_reseed(); + + return 0; +} + +static int lrng_kcapi_if_random(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) +{ + int ret = lrng_kcapi_if_reseed(src, slen); + + if (!ret) + lrng_get_random_bytes_full(rdata, dlen); + + return ret; +} + +static int lrng_kcapi_if_reset(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) +{ + return lrng_kcapi_if_reseed(seed, slen); +} + +static struct rng_alg lrng_alg = { + .generate = lrng_kcapi_if_random, + .seed = lrng_kcapi_if_reset, + .seedsize = 0, + .base = { + .cra_name = "stdrng", + .cra_driver_name = "lrng", + .cra_priority = 500, + .cra_ctxsize = 0, + .cra_module = THIS_MODULE, + .cra_init = lrng_kcapi_if_init, + .cra_exit = lrng_kcapi_if_cleanup, + + } +}; + +#ifdef CONFIG_LRNG_DRNG_ATOMIC +static int lrng_kcapi_if_random_atomic(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) +{ + int ret = lrng_kcapi_if_reseed(src, slen); + + if (!ret) + lrng_get_random_bytes(rdata, dlen); + + return ret; +} + +static struct rng_alg lrng_alg_atomic = { + .generate = lrng_kcapi_if_random_atomic, + .seed = lrng_kcapi_if_reset, + .seedsize = 0, + .base = { + .cra_name = "lrng_atomic", + .cra_driver_name = "lrng_atomic", + .cra_priority = 100, + .cra_ctxsize = 0, + .cra_module = THIS_MODULE, + .cra_init = lrng_kcapi_if_init, + .cra_exit = lrng_kcapi_if_cleanup, + + } +}; +#endif /* CONFIG_LRNG_DRNG_ATOMIC */ + +static int __init lrng_kcapi_if_mod_init(void) +{ + return +#ifdef CONFIG_LRNG_DRNG_ATOMIC + crypto_register_rng(&lrng_alg_atomic) ?: +#endif + crypto_register_rng(&lrng_alg); +} + +static void __exit lrng_kcapi_if_mod_exit(void) +{ + crypto_unregister_rng(&lrng_alg); +#ifdef CONFIG_LRNG_DRNG_ATOMIC + crypto_unregister_rng(&lrng_alg_atomic); +#endif +} + +module_init(lrng_kcapi_if_mod_init); +module_exit(lrng_kcapi_if_mod_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller "); +MODULE_DESCRIPTION("Entropy Source and DRNG Manager kernel crypto API RNG framework interface"); +MODULE_ALIAS_CRYPTO("lrng"); +MODULE_ALIAS_CRYPTO("lrng_atomic"); +MODULE_ALIAS_CRYPTO("stdrng"); diff --git a/drivers/char/lrng/lrng_interface_random_kernel.c b/drivers/char/lrng/lrng_interface_random_kernel.c new file mode 100644 index 0000000000000..81060ba98742d --- /dev/null +++ b/drivers/char/lrng/lrng_interface_random_kernel.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Kernel space interfaces API/ABI compliant to linux/random.h + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include "lrng_es_aux.h" +#include "lrng_es_irq.h" +#include "lrng_es_mgr.h" +#include "lrng_interface_dev_common.h" +#include "lrng_interface_random_kernel.h" + +static RAW_NOTIFIER_HEAD(lrng_ready_chain); +static DEFINE_SPINLOCK(lrng_ready_chain_lock); +static unsigned int lrng_ready_chain_used = 0; + +/********************************** Helper ***********************************/ + +int __init random_init(const char *command_line) +{ + int ret = lrng_rand_initialize(); + + lrng_pool_insert_aux(command_line, strlen(command_line), 0); + return ret; +} + +bool lrng_ready_chain_has_sleeper(void) +{ + return !!lrng_ready_chain_used; +} + +/* + * lrng_process_ready_list() - Ping all kernel internal callers waiting until + * the DRNG is completely initialized to inform that the DRNG reached that + * seed level. + * + * When the SP800-90B testing is enabled, the ping only happens if the SP800-90B + * startup health tests are completed. This implies that kernel internal + * callers always have an SP800-90B compliant noise source when being + * pinged. + */ +void lrng_process_ready_list(void) +{ + unsigned long flags; + + if (!lrng_state_operational()) + return; + + spin_lock_irqsave(&lrng_ready_chain_lock, flags); + raw_notifier_call_chain(&lrng_ready_chain, 0, NULL); + spin_unlock_irqrestore(&lrng_ready_chain_lock, flags); +} + +/************************ LRNG kernel input interfaces ************************/ + +/* + * add_hwgenerator_randomness() - Interface for in-kernel drivers of true + * hardware RNGs. + * + * Those devices may produce endless random bits and will be throttled + * when our pool is full. + * + * @buffer: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @count: length of buffer + * @entropy_bits: amount of entropy in buffer (value is in bits) + */ +void add_hwgenerator_randomness(const void *buffer, size_t count, + size_t entropy_bits) +{ + /* + * Suspend writing if we are fully loaded with entropy. + * We'll be woken up again once below lrng_write_wakeup_thresh, + * or when the calling thread is about to terminate. + */ + wait_event_interruptible(lrng_write_wait, + lrng_need_entropy() || + lrng_state_exseed_allow(lrng_noise_source_hw) || + kthread_should_stop()); + lrng_state_exseed_set(lrng_noise_source_hw, false); + lrng_pool_insert_aux(buffer, count, entropy_bits); +} +EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* + * add_bootloader_randomness() - Handle random seed passed by bootloader. + * + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_bootloader_randomness(const void *buf, size_t size) +{ + lrng_pool_insert_aux(buf, size, + IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER) ? + size * 8 : 0); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +/* + * Callback for HID layer -- use the HID event values to stir the entropy pool + */ +void add_input_randomness(unsigned int type, unsigned int code, + unsigned int value) +{ + static unsigned char last_value; + + /* ignore autorepeat and the like */ + if (value == last_value) + return; + + last_value = value; + + lrng_irq_array_add_u32((type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +/* + * add_device_randomness() - Add device- or boot-specific data to the entropy + * pool to help initialize it. + * + * None of this adds any entropy; it is meant to avoid the problem of + * the entropy pool having similar initial state across largely + * identical devices. + * + * @buf: buffer holding the entropic data from HW noise sources to be used to + * insert into entropy pool. + * @size: length of buffer + */ +void add_device_randomness(const void *buf, size_t size) +{ + lrng_pool_insert_aux((u8 *)buf, size, 0); +} +EXPORT_SYMBOL(add_device_randomness); + +#ifdef CONFIG_BLOCK +void rand_initialize_disk(struct gendisk *disk) { } +void add_disk_randomness(struct gendisk *disk) { } +EXPORT_SYMBOL(add_disk_randomness); +#endif + +#ifndef CONFIG_LRNG_IRQ +void add_interrupt_randomness(int irq) { } +EXPORT_SYMBOL(add_interrupt_randomness); +#endif + +/* + * unregister_random_ready_notifier() - Delete a previously registered readiness + * callback function. + * + * @nb: callback definition that was registered initially + */ +int unregister_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&lrng_ready_chain_lock, flags); + ret = raw_notifier_chain_unregister(&lrng_ready_chain, nb); + spin_unlock_irqrestore(&lrng_ready_chain_lock, flags); + + if (!ret && lrng_ready_chain_used) + lrng_ready_chain_used--; + + return ret; +} +EXPORT_SYMBOL(unregister_random_ready_notifier); + +/* + * register_random_ready_notifier() - Add a callback function that will be + * invoked when the DRNG is fully initialized and seeded. + * + * @nb: callback definition to be invoked when the LRNG is seeded + * + * Return: + * * 0 if callback is successfully added + * * -EALREADY if pool is already initialised (callback not called) + */ +int register_random_ready_notifier(struct notifier_block *nb) +{ + unsigned long flags; + int err = -EALREADY; + + if (likely(lrng_state_operational())) + return err; + + spin_lock_irqsave(&lrng_ready_chain_lock, flags); + if (!lrng_state_operational()) + err = raw_notifier_chain_register(&lrng_ready_chain, nb); + spin_unlock_irqrestore(&lrng_ready_chain_lock, flags); + + if (!err) + lrng_ready_chain_used++; + + return err; +} +EXPORT_SYMBOL(register_random_ready_notifier); + +#if IS_ENABLED(CONFIG_VMGENID) +static BLOCKING_NOTIFIER_HEAD(lrng_vmfork_chain); + +/* + * Handle a new unique VM ID, which is unique, not secret, so we + * don't credit it, but we do immediately force a reseed after so + * that it's used by the crng posthaste. + */ +void add_vmfork_randomness(const void *unique_vm_id, size_t size) +{ + add_device_randomness(unique_vm_id, size); + if (lrng_state_operational()) + lrng_drng_force_reseed(); + blocking_notifier_call_chain(&lrng_vmfork_chain, 0, NULL); +} +#if IS_MODULE(CONFIG_VMGENID) +EXPORT_SYMBOL_GPL(add_vmfork_randomness); +#endif + +int register_random_vmfork_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&lrng_vmfork_chain, nb); +} +EXPORT_SYMBOL_GPL(register_random_vmfork_notifier); + +int unregister_random_vmfork_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&lrng_vmfork_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_random_vmfork_notifier); +#endif + +/*********************** LRNG kernel output interfaces ************************/ + +/* + * get_random_bytes() - Provider of cryptographic strong random numbers for + * kernel-internal usage. + * + * This function is appropriate for all in-kernel use cases. However, + * it will always use the ChaCha20 DRNG. + * + * @buf: buffer to store the random bytes + * @nbytes: size of the buffer + */ +void get_random_bytes(void *buf, size_t nbytes) +{ + lrng_get_random_bytes(buf, nbytes); +} +EXPORT_SYMBOL(get_random_bytes); + +/* + * wait_for_random_bytes() - Wait for the LRNG to be seeded and thus + * guaranteed to supply cryptographically secure random numbers. + * + * This applies to: the /dev/urandom device, the get_random_bytes function, + * and the get_random_{u32,u64,int,long} family of functions. Using any of + * these functions without first calling this function forfeits the guarantee + * of security. + * + * Return: + * * 0 if the LRNG has been seeded. + * * -ERESTARTSYS if the function was interrupted by a signal. + */ +int wait_for_random_bytes(void) +{ + return lrng_drng_sleep_while_non_min_seeded(); +} +EXPORT_SYMBOL(wait_for_random_bytes); + +/* + * get_random_bytes_arch() - This function will use the architecture-specific + * hardware random number generator if it is available. + * + * The arch-specific hw RNG will almost certainly be faster than what we can + * do in software, but it is impossible to verify that it is implemented + * securely (as opposed, to, say, the AES encryption of a sequence number using + * a key known by the NSA). So it's useful if we need the speed, but only if + * we're willing to trust the hardware manufacturer not to have put in a back + * door. + * + * @buf: buffer allocated by caller to store the random data in + * @nbytes: length of outbuf + * + * Return: number of bytes filled in. + */ +size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +{ + size_t left = nbytes; + u8 *p = buf; + + while (left) { + unsigned long v; + size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + + if (!arch_get_random_long(&v)) + break; + + memcpy(p, &v, chunk); + p += chunk; + left -= chunk; + } + + return nbytes - left; +} +EXPORT_SYMBOL(get_random_bytes_arch); + +/* + * Returns whether or not the LRNG has been seeded. + * + * Returns: true if the urandom pool has been seeded. + * false if the urandom pool has not been seeded. + */ +bool rng_is_initialized(void) +{ + return lrng_state_operational(); +} +EXPORT_SYMBOL(rng_is_initialized); diff --git a/drivers/char/lrng/lrng_interface_random_kernel.h b/drivers/char/lrng/lrng_interface_random_kernel.h new file mode 100644 index 0000000000000..763ec1a633ff4 --- /dev/null +++ b/drivers/char/lrng/lrng_interface_random_kernel.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_INTERFACE_RANDOM_H +#define _LRNG_INTERFACE_RANDOM_H + +#ifdef CONFIG_LRNG_RANDOM_IF +void lrng_process_ready_list(void); +bool lrng_ready_chain_has_sleeper(void); +void invalidate_batched_entropy(void); +#else /* CONFIG_LRNG_RANDOM_IF */ +static inline bool lrng_ready_chain_has_sleeper(void) { return false; } +static inline void lrng_process_ready_list(void) { } +static inline void invalidate_batched_entropy(void) { } +#endif /* CONFIG_LRNG_RANDOM_IF */ + +#endif /* _LRNG_INTERFACE_RANDOM_H */ diff --git a/drivers/char/lrng/lrng_interface_random_user.c b/drivers/char/lrng/lrng_interface_random_user.c new file mode 100644 index 0000000000000..7e45f41b7a4df --- /dev/null +++ b/drivers/char/lrng/lrng_interface_random_user.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG Common user space interfaces compliant to random(4), random(7) and + * getrandom(2) man pages. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_es_mgr.h" +#include "lrng_interface_dev_common.h" + +static ssize_t lrng_drng_read(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (!lrng_state_min_seeded()) + pr_notice_ratelimited("%s - use of insufficiently seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + else if (!lrng_state_operational()) + pr_debug_ratelimited("%s - use of not fully seeded DRNG (%zu bytes read)\n", + current->comm, nbytes); + + return lrng_read_common(buf, nbytes); +} + +const struct file_operations random_fops = { + .read = lrng_drng_read_block, + .write = lrng_drng_write, + .poll = lrng_random_poll, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +const struct file_operations urandom_fops = { + .read = lrng_drng_read, + .write = lrng_drng_write, + .unlocked_ioctl = lrng_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .fasync = lrng_fasync, + .llseek = noop_llseek, +}; + +SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, + unsigned int, flags) +{ + if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) + return -EINVAL; + + /* + * Requesting insecure and blocking randomness at the same time makes + * no sense. + */ + if ((flags & + (GRND_INSECURE|GRND_RANDOM)) == (GRND_INSECURE|GRND_RANDOM)) + return -EINVAL; + + if (count > INT_MAX) + count = INT_MAX; + + if (flags & GRND_INSECURE) + return lrng_drng_read(NULL, buf, count, NULL); + + return lrng_read_common_block(flags & GRND_NONBLOCK, buf, count); +} diff --git a/drivers/char/lrng/lrng_numa.c b/drivers/char/lrng/lrng_numa.c new file mode 100644 index 0000000000000..d74dd8df28437 --- /dev/null +++ b/drivers/char/lrng/lrng_numa.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG NUMA support + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_irq.h" +#include "lrng_es_mgr.h" +#include "lrng_numa.h" +#include "lrng_proc.h" + +static struct lrng_drng **lrng_drng __read_mostly = NULL; + +struct lrng_drng **lrng_drng_instances(void) +{ + /* counterpart to cmpxchg_release in _lrng_drngs_numa_alloc */ + return READ_ONCE(lrng_drng); +} + +/* Allocate the data structures for the per-NUMA node DRNGs */ +static void _lrng_drngs_numa_alloc(struct work_struct *work) +{ + struct lrng_drng **drngs; + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + u32 node; + bool init_drng_used = false; + + mutex_lock(&lrng_crypto_cb_update); + + /* per-NUMA-node DRNGs are already present */ + if (lrng_drng) + goto unlock; + + /* Make sure the initial DRNG is initialized and its drng_cb is set */ + if (lrng_drng_initalize()) + goto err; + + drngs = kcalloc(nr_node_ids, sizeof(void *), GFP_KERNEL|__GFP_NOFAIL); + for_each_online_node(node) { + struct lrng_drng *drng; + + if (!init_drng_used) { + drngs[node] = lrng_drng_init; + init_drng_used = true; + continue; + } + + drng = kmalloc_node(sizeof(struct lrng_drng), + GFP_KERNEL|__GFP_NOFAIL, node); + memset(drng, 0, sizeof(lrng_drng)); + + if (lrng_drng_alloc_common(drng, lrng_drng_init->drng_cb)) { + kfree(drng); + goto err; + } + + drng->hash_cb = lrng_drng_init->hash_cb; + drng->hash = lrng_drng_init->hash_cb->hash_alloc(); + if (IS_ERR(drng->hash)) { + lrng_drng_init->drng_cb->drng_dealloc(drng->drng); + kfree(drng); + goto err; + } + + mutex_init(&drng->lock); + rwlock_init(&drng->hash_lock); + + /* + * No reseeding of NUMA DRNGs from previous DRNGs as this + * would complicate the code. Let it simply reseed. + */ + drngs[node] = drng; + + lrng_pool_inc_numa_node(); + pr_info("DRNG and entropy pool read hash for NUMA node %d allocated\n", + node); + } + + /* counterpart to READ_ONCE in lrng_drng_instances */ + if (!cmpxchg_release(&lrng_drng, NULL, drngs)) { + lrng_pool_all_numa_nodes_seeded(false); + goto unlock; + } + +err: + for_each_online_node(node) { + struct lrng_drng *drng = drngs[node]; + + if (drng == lrng_drng_init) + continue; + + if (drng) { + drng->hash_cb->hash_dealloc(drng->hash); + drng->drng_cb->drng_dealloc(drng->drng); + kfree(drng); + } + } + kfree(drngs); + +unlock: + mutex_unlock(&lrng_crypto_cb_update); +} + +static DECLARE_WORK(lrng_drngs_numa_alloc_work, _lrng_drngs_numa_alloc); + +static void lrng_drngs_numa_alloc(void) +{ + schedule_work(&lrng_drngs_numa_alloc_work); +} + +static int __init lrng_numa_init(void) +{ + lrng_drngs_numa_alloc(); + return 0; +} + +late_initcall(lrng_numa_init); diff --git a/drivers/char/lrng/lrng_numa.h b/drivers/char/lrng/lrng_numa.h new file mode 100644 index 0000000000000..dc8dff9816eeb --- /dev/null +++ b/drivers/char/lrng/lrng_numa.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_NUMA_H +#define _LRNG_NUMA_H + +#ifdef CONFIG_NUMA +struct lrng_drng **lrng_drng_instances(void); +#else /* CONFIG_NUMA */ +static inline struct lrng_drng **lrng_drng_instances(void) { return NULL; } +#endif /* CONFIG_NUMA */ + +#endif /* _LRNG_NUMA_H */ diff --git a/drivers/char/lrng/lrng_proc.c b/drivers/char/lrng/lrng_proc.c new file mode 100644 index 0000000000000..6e3e0699a8406 --- /dev/null +++ b/drivers/char/lrng/lrng_proc.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG proc interfaces + * + * Copyright (C) 2022, Stephan Mueller + */ + +#include +#include +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_proc.h" + +/* Number of online DRNGs */ +static u32 numa_drngs = 1; + +void lrng_pool_inc_numa_node(void) +{ + numa_drngs++; +} + +static int lrng_proc_type_show(struct seq_file *m, void *v) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + unsigned char buf[250]; + u32 i; + + mutex_lock(&lrng_drng_init->lock); + snprintf(buf, sizeof(buf), + "DRNG name: %s\n" + "LRNG security strength in bits: %d\n" + "Number of DRNG instances: %u\n" + "Standards compliance: %s\n" + "LRNG minimally seeded: %s\n" + "LRNG fully seeded: %s\n", + lrng_drng_init->drng_cb->drng_name(), + lrng_security_strength(), + numa_drngs, + lrng_sp80090c_compliant() ? "SP800-90C " : "", + lrng_state_min_seeded() ? "true" : "false", + lrng_state_fully_seeded() ? "true" : "false"); + seq_write(m, buf, strlen(buf)); + + for_each_lrng_es(i) { + snprintf(buf, sizeof(buf), + "Entropy Source %u properties:\n" + " Name: %s\n", + i, lrng_es[i]->name); + seq_write(m, buf, strlen(buf)); + + buf[0] = '\0'; + lrng_es[i]->state(buf, sizeof(buf)); + seq_write(m, buf, strlen(buf)); + } + + mutex_unlock(&lrng_drng_init->lock); + + return 0; +} + +static int __init lrng_proc_type_init(void) +{ + proc_create_single("lrng_type", 0444, NULL, &lrng_proc_type_show); + return 0; +} + +module_init(lrng_proc_type_init); diff --git a/drivers/char/lrng/lrng_proc.h b/drivers/char/lrng/lrng_proc.h new file mode 100644 index 0000000000000..c653274f19547 --- /dev/null +++ b/drivers/char/lrng/lrng_proc.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_PROC_H +#define _LRNG_PROC_H + +#ifdef CONFIG_SYSCTL +void lrng_pool_inc_numa_node(void); +#else +static inline void lrng_pool_inc_numa_node(void) { } +#endif + +#endif /* _LRNG_PROC_H */ diff --git a/drivers/char/lrng/lrng_selftest.c b/drivers/char/lrng/lrng_selftest.c new file mode 100644 index 0000000000000..15f1e4a2a719a --- /dev/null +++ b/drivers/char/lrng/lrng_selftest.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG power-on and on-demand self-test + * + * Copyright (C) 2022, Stephan Mueller + */ + +/* + * In addition to the self-tests below, the following LRNG components + * are covered with self-tests during regular operation: + * + * * power-on self-test: SP800-90A DRBG provided by the Linux kernel crypto API + * * power-on self-test: PRNG provided by the Linux kernel crypto API + * * runtime test: Raw noise source data testing including SP800-90B compliant + * tests when enabling CONFIG_LRNG_HEALTH_TESTS + * + * Additional developer tests present with LRNG code: + * * SP800-90B APT and RCT test enforcement validation when enabling + * CONFIG_LRNG_APT_BROKEN or CONFIG_LRNG_RCT_BROKEN. + * * Collection of raw entropy from the interrupt noise source when enabling + * CONFIG_LRNG_TESTING and pulling the data from the kernel with the provided + * interface. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_drng_chacha20.h" +#include "lrng_sha.h" + +#define LRNG_SELFTEST_PASSED 0 +#define LRNG_SEFLTEST_ERROR_TIME (1 << 0) +#define LRNG_SEFLTEST_ERROR_CHACHA20 (1 << 1) +#define LRNG_SEFLTEST_ERROR_HASH (1 << 2) +#define LRNG_SEFLTEST_ERROR_GCD (1 << 3) +#define LRNG_SELFTEST_NOT_EXECUTED 0xffffffff + +#ifdef CONFIG_LRNG_TIMER_COMMON + +#include "lrng_es_timer_common.h" + +static u32 lrng_data_selftest_ptr = 0; +static u32 lrng_data_selftest[LRNG_DATA_ARRAY_SIZE]; + +static void lrng_data_process_selftest_insert(u32 time) +{ + u32 ptr = lrng_data_selftest_ptr++ & LRNG_DATA_WORD_MASK; + unsigned int array = lrng_data_idx2array(ptr); + unsigned int slot = lrng_data_idx2slot(ptr); + + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[array] &= + ~(lrng_data_slot_val(0xffffffff & LRNG_DATA_SLOTSIZE_MASK, + slot)); + lrng_data_selftest[array] |= + lrng_data_slot_val(time & LRNG_DATA_SLOTSIZE_MASK, slot); +} + +static void lrng_data_process_selftest_u32(u32 data) +{ + u32 pre_ptr, ptr, mask; + unsigned int pre_array; + + /* Increment pointer by number of slots taken for input value */ + lrng_data_selftest_ptr += LRNG_DATA_SLOTS_PER_UINT; + + /* ptr to current unit */ + ptr = lrng_data_selftest_ptr; + + lrng_data_split_u32(&ptr, &pre_ptr, &mask); + + /* MSB of data go into previous unit */ + pre_array = lrng_data_idx2array(pre_ptr); + /* zeroization of slot to ensure the following OR adds the data */ + lrng_data_selftest[pre_array] &= ~(0xffffffff & ~mask); + lrng_data_selftest[pre_array] |= data & ~mask; + + /* LSB of data go into current unit */ + lrng_data_selftest[lrng_data_idx2array(ptr)] = data & mask; +} + +static unsigned int lrng_data_process_selftest(void) +{ + u32 time; + u32 idx_zero_compare = (0 << 0) | (1 << 8) | (2 << 16) | (3 << 24); + u32 idx_one_compare = (4 << 0) | (5 << 8) | (6 << 16) | (7 << 24); + u32 idx_last_compare = + (((LRNG_DATA_NUM_VALUES - 4) & LRNG_DATA_SLOTSIZE_MASK) << 0) | + (((LRNG_DATA_NUM_VALUES - 3) & LRNG_DATA_SLOTSIZE_MASK) << 8) | + (((LRNG_DATA_NUM_VALUES - 2) & LRNG_DATA_SLOTSIZE_MASK) << 16) | + (((LRNG_DATA_NUM_VALUES - 1) & LRNG_DATA_SLOTSIZE_MASK) << 24); + + (void)idx_one_compare; + + /* "poison" the array to verify the operation of the zeroization */ + lrng_data_selftest[0] = 0xffffffff; + lrng_data_selftest[1] = 0xffffffff; + + lrng_data_process_selftest_insert(0); + /* + * Note, when using lrng_data_process_u32() on unaligned ptr, + * the first slots will go into next word, and the last slots go + * into the previous word. + */ + lrng_data_process_selftest_u32((4 << 0) | (1 << 8) | (2 << 16) | + (3 << 24)); + lrng_data_process_selftest_insert(5); + lrng_data_process_selftest_insert(6); + lrng_data_process_selftest_insert(7); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare)) + goto err; + + /* Reset for next test */ + lrng_data_selftest[0] = 0; + lrng_data_selftest[1] = 0; + lrng_data_selftest_ptr = 0; + + for (time = 0; time < LRNG_DATA_NUM_VALUES; time++) + lrng_data_process_selftest_insert(time); + + if ((lrng_data_selftest[0] != idx_zero_compare) || + (lrng_data_selftest[1] != idx_one_compare) || + (lrng_data_selftest[LRNG_DATA_ARRAY_SIZE - 1] != idx_last_compare)) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG data array self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_TIME; +} + +static unsigned int lrng_gcd_selftest(void) +{ + u32 history[10]; + unsigned int i; + +#define LRNG_GCD_SELFTEST 3 + for (i = 0; i < ARRAY_SIZE(history); i++) + history[i] = i * LRNG_GCD_SELFTEST; + + if (lrng_gcd_analyze(history, ARRAY_SIZE(history)) == LRNG_GCD_SELFTEST) + return LRNG_SELFTEST_PASSED; + + pr_err("LRNG GCD self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_GCD; +} + +#else /* CONFIG_LRNG_TIMER_COMMON */ + +static unsigned int lrng_data_process_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +static unsigned int lrng_gcd_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +#endif /* CONFIG_LRNG_TIMER_COMMON */ + +/* The test vectors are taken from crypto/testmgr.h */ +static unsigned int lrng_hash_selftest(void) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_hash_cb *hash_cb = &lrng_sha_hash_cb; + static const u8 lrng_hash_selftest_result[] = +#ifdef CONFIG_CRYPTO_LIB_SHA256 + { 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad }; +#else /* CONFIG_CRYPTO_LIB_SHA256 */ + { 0xa9, 0x99, 0x3e, 0x36, 0x47, 0x06, 0x81, 0x6a, 0xba, 0x3e, + 0x25, 0x71, 0x78, 0x50, 0xc2, 0x6c, 0x9c, 0xd0, 0xd8, 0x9d }; +#endif /* CONFIG_CRYPTO_LIB_SHA256 */ + static const u8 hash_input[] = { 0x61, 0x62, 0x63 }; /* "abc" */ + u8 digest[sizeof(lrng_hash_selftest_result)] __aligned(sizeof(u32)); + + if (sizeof(digest) != hash_cb->hash_digestsize(NULL)) + return LRNG_SEFLTEST_ERROR_HASH; + + if (!hash_cb->hash_init(shash, NULL) && + !hash_cb->hash_update(shash, hash_input, + sizeof(hash_input)) && + !hash_cb->hash_final(shash, digest) && + !memcmp(digest, lrng_hash_selftest_result, sizeof(digest))) + return 0; + + pr_err("LRNG %s Hash self-test FAILED\n", hash_cb->hash_name()); + return LRNG_SEFLTEST_ERROR_HASH; +} + +#ifdef CONFIG_LRNG_DRNG_CHACHA20 + +static void lrng_selftest_bswap32(u32 *ptr, u32 words) +{ + u32 i; + + /* Byte-swap data which is an LE representation */ + for (i = 0; i < words; i++) { + __le32 *p = (__le32 *)ptr; + + *p = cpu_to_le32(*ptr); + ptr++; + } +} + +/* + * The test vectors were generated using the ChaCha20 DRNG from + * https://www.chronox.de/chacha20.html + */ +static unsigned int lrng_chacha20_drng_selftest(void) +{ + const struct lrng_drng_cb *drng_cb = &lrng_cc20_drng_cb; + u8 seed[CHACHA_KEY_SIZE * 2] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + }; + struct chacha20_block chacha20; + int ret; + u8 outbuf[CHACHA_KEY_SIZE * 2] __aligned(sizeof(u32)); + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * and pulling one half ChaCha20 DRNG block. + */ + static const u8 expected_halfblock[CHACHA_KEY_SIZE] = { + 0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90, + 0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28, + 0xbd, 0xd2, 0x19, 0xb8, 0xa0, 0x8d, 0xed, 0x1a, + 0xa8, 0x36, 0xef, 0xcc, 0x8b, 0x77, 0x0d, 0xc7 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with two keyblocks + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + * 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + * 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + * 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f + * and pulling one ChaCha20 DRNG block. + */ + static const u8 expected_oneblock[CHACHA_KEY_SIZE * 2] = { + 0xe3, 0xb0, 0x8a, 0xcc, 0x34, 0xc3, 0x17, 0x0e, + 0xc3, 0xd8, 0xc3, 0x40, 0xe7, 0x73, 0xe9, 0x0d, + 0xd1, 0x62, 0xa3, 0x5d, 0x7d, 0xf2, 0xf1, 0x4a, + 0x24, 0x42, 0xb7, 0x1e, 0xb0, 0x05, 0x17, 0x07, + 0xb9, 0x35, 0x10, 0x69, 0x8b, 0x46, 0xfb, 0x51, + 0xe9, 0x91, 0x3f, 0x46, 0xf2, 0x4d, 0xea, 0xd0, + 0x81, 0xc1, 0x1b, 0xa9, 0x5d, 0x52, 0x91, 0x5f, + 0xcd, 0xdc, 0xc6, 0xd6, 0xc3, 0x7c, 0x50, 0x23 }; + + /* + * Expected result when ChaCha20 DRNG state is zero: + * * constants are set to "expand 32-byte k" + * * remaining state is 0 + * followed by a reseed with one key block plus one byte + * 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + * 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + * 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + * 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + * 0x20 + * and pulling less than one ChaCha20 DRNG block. + */ + static const u8 expected_block_nonalinged[CHACHA_KEY_SIZE + 4] = { + 0x9c, 0xfc, 0x5e, 0x31, 0x21, 0x62, 0x11, 0x85, + 0xd3, 0x77, 0xd3, 0x69, 0x0f, 0xa8, 0x16, 0x55, + 0xb4, 0x4c, 0xf6, 0x52, 0xf3, 0xa8, 0x37, 0x99, + 0x38, 0x76, 0xa0, 0x66, 0xec, 0xbb, 0xce, 0xa9, + 0x9c, 0x95, 0xa1, 0xfd }; + + BUILD_BUG_ON(sizeof(seed) % sizeof(u32)); + + memset(&chacha20, 0, sizeof(chacha20)); + lrng_cc20_init_rfc7539(&chacha20); + lrng_selftest_bswap32((u32 *)seed, sizeof(seed) / sizeof(u32)); + + /* Generate with zero state */ + ret = drng_cb->drng_generate(&chacha20, outbuf, + sizeof(expected_halfblock)); + if (ret != sizeof(expected_halfblock)) + goto err; + if (memcmp(outbuf, expected_halfblock, sizeof(expected_halfblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 2 key blocks */ + ret = drng_cb->drng_seed(&chacha20, seed, sizeof(expected_oneblock)); + if (ret < 0) + goto err; + ret = drng_cb->drng_generate(&chacha20, outbuf, + sizeof(expected_oneblock)); + if (ret != sizeof(expected_oneblock)) + goto err; + if (memcmp(outbuf, expected_oneblock, sizeof(expected_oneblock))) + goto err; + + /* Clear state of DRNG */ + memset(&chacha20.key.u[0], 0, 48); + + /* Reseed with 1 key block and one byte */ + ret = drng_cb->drng_seed(&chacha20, seed, + sizeof(expected_block_nonalinged)); + if (ret < 0) + goto err; + ret = drng_cb->drng_generate(&chacha20, outbuf, + sizeof(expected_block_nonalinged)); + if (ret != sizeof(expected_block_nonalinged)) + goto err; + if (memcmp(outbuf, expected_block_nonalinged, + sizeof(expected_block_nonalinged))) + goto err; + + return LRNG_SELFTEST_PASSED; + +err: + pr_err("LRNG ChaCha20 DRNG self-test FAILED\n"); + return LRNG_SEFLTEST_ERROR_CHACHA20; +} + +#else /* CONFIG_LRNG_DRNG_CHACHA20 */ + +static unsigned int lrng_chacha20_drng_selftest(void) +{ + return LRNG_SELFTEST_PASSED; +} + +#endif /* CONFIG_LRNG_DRNG_CHACHA20 */ + +static unsigned int lrng_selftest_status = LRNG_SELFTEST_NOT_EXECUTED; + +static int lrng_selftest(void) +{ + unsigned int ret = lrng_data_process_selftest(); + + ret |= lrng_chacha20_drng_selftest(); + ret |= lrng_hash_selftest(); + ret |= lrng_gcd_selftest(); + + if (ret) { + if (IS_ENABLED(CONFIG_LRNG_SELFTEST_PANIC)) + panic("LRNG self-tests failed: %u\n", ret); + } else { + pr_info("LRNG self-tests passed\n"); + } + + lrng_selftest_status = ret; + + if (lrng_selftest_status) + return -EFAULT; + return 0; +} + +#ifdef CONFIG_SYSFS +/* Re-perform self-test when any value is written to the sysfs file. */ +static int lrng_selftest_sysfs_set(const char *val, + const struct kernel_param *kp) +{ + return lrng_selftest(); +} + +static const struct kernel_param_ops lrng_selftest_sysfs = { + .set = lrng_selftest_sysfs_set, + .get = param_get_uint, +}; +module_param_cb(selftest_status, &lrng_selftest_sysfs, &lrng_selftest_status, + 0644); +#endif /* CONFIG_SYSFS */ + +static int __init lrng_selftest_init(void) +{ + return lrng_selftest(); +} + +module_init(lrng_selftest_init); diff --git a/drivers/char/lrng/lrng_sha.h b/drivers/char/lrng/lrng_sha.h new file mode 100644 index 0000000000000..d2f134f547737 --- /dev/null +++ b/drivers/char/lrng/lrng_sha.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * LRNG SHA definition usable in atomic contexts right from the start of the + * kernel. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_SHA_H +#define _LRNG_SHA_H + +extern const struct lrng_hash_cb lrng_sha_hash_cb; + +#endif /* _LRNG_SHA_H */ diff --git a/drivers/char/lrng/lrng_sha1.c b/drivers/char/lrng/lrng_sha1.c new file mode 100644 index 0000000000000..9cbc7a6fee49a --- /dev/null +++ b/drivers/char/lrng/lrng_sha1.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the SHA-1 implementation that can be used + * without the kernel crypto API available including during early boot and in + * atomic contexts. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "lrng_sha.h" + +/* + * If the SHA-256 support is not compiled, we fall back to SHA-1 that is always + * compiled and present in the kernel. + */ +static u32 lrng_sha1_hash_digestsize(void *hash) +{ + return SHA1_DIGEST_SIZE; +} + +static void lrng_sha1_block_fn(struct sha1_state *sctx, const u8 *src, + int blocks) +{ + u32 temp[SHA1_WORKSPACE_WORDS]; + + while (blocks--) { + sha1_transform(sctx->state, src, temp); + src += SHA1_BLOCK_SIZE; + } + memzero_explicit(temp, sizeof(temp)); +} + +static int lrng_sha1_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha1_state on the stack. + */ + sha1_base_init(shash); + return 0; +} + +static int lrng_sha1_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + return sha1_base_do_update(shash, inbuf, inbuflen, lrng_sha1_block_fn); +} + +static int lrng_sha1_hash_final(struct shash_desc *shash, u8 *digest) +{ + return sha1_base_do_finalize(shash, lrng_sha1_block_fn) ?: + sha1_base_finish(shash, digest); +} + +static const char *lrng_sha1_hash_name(void) +{ + return "SHA-1"; +} + +static void lrng_sha1_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha1_state)); +} + +static void *lrng_sha1_hash_alloc(void) +{ + pr_info("Hash %s allocated\n", lrng_sha1_hash_name()); + return NULL; +} + +static void lrng_sha1_hash_dealloc(void *hash) { } + +const struct lrng_hash_cb lrng_sha_hash_cb = { + .hash_name = lrng_sha1_hash_name, + .hash_alloc = lrng_sha1_hash_alloc, + .hash_dealloc = lrng_sha1_hash_dealloc, + .hash_digestsize = lrng_sha1_hash_digestsize, + .hash_init = lrng_sha1_hash_init, + .hash_update = lrng_sha1_hash_update, + .hash_final = lrng_sha1_hash_final, + .hash_desc_zero = lrng_sha1_hash_desc_zero, +}; diff --git a/drivers/char/lrng/lrng_sha256.c b/drivers/char/lrng/lrng_sha256.c new file mode 100644 index 0000000000000..50705351a71cc --- /dev/null +++ b/drivers/char/lrng/lrng_sha256.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Backend for the LRNG providing the SHA-256 implementation that can be used + * without the kernel crypto API available including during early boot and in + * atomic contexts. + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#include "lrng_sha.h" + +static u32 lrng_sha256_hash_digestsize(void *hash) +{ + return SHA256_DIGEST_SIZE; +} + +static int lrng_sha256_hash_init(struct shash_desc *shash, void *hash) +{ + /* + * We do not need a TFM - we only need sufficient space for + * struct sha256_state on the stack. + */ + sha256_init(shash_desc_ctx(shash)); + return 0; +} + +static int lrng_sha256_hash_update(struct shash_desc *shash, + const u8 *inbuf, u32 inbuflen) +{ + sha256_update(shash_desc_ctx(shash), inbuf, inbuflen); + return 0; +} + +static int lrng_sha256_hash_final(struct shash_desc *shash, u8 *digest) +{ + sha256_final(shash_desc_ctx(shash), digest); + return 0; +} + +static const char *lrng_sha256_hash_name(void) +{ + return "SHA-256"; +} + +static void lrng_sha256_hash_desc_zero(struct shash_desc *shash) +{ + memzero_explicit(shash_desc_ctx(shash), sizeof(struct sha256_state)); +} + +static void *lrng_sha256_hash_alloc(void) +{ + pr_info("Hash %s allocated\n", lrng_sha256_hash_name()); + return NULL; +} + +static void lrng_sha256_hash_dealloc(void *hash) { } + +const struct lrng_hash_cb lrng_sha_hash_cb = { + .hash_name = lrng_sha256_hash_name, + .hash_alloc = lrng_sha256_hash_alloc, + .hash_dealloc = lrng_sha256_hash_dealloc, + .hash_digestsize = lrng_sha256_hash_digestsize, + .hash_init = lrng_sha256_hash_init, + .hash_update = lrng_sha256_hash_update, + .hash_final = lrng_sha256_hash_final, + .hash_desc_zero = lrng_sha256_hash_desc_zero, +}; diff --git a/drivers/char/lrng/lrng_switch.c b/drivers/char/lrng/lrng_switch.c new file mode 100644 index 0000000000000..5ffc0286a4a45 --- /dev/null +++ b/drivers/char/lrng/lrng_switch.c @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG DRNG switching support + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include + +#include "lrng_es_aux.h" +#include "lrng_es_mgr.h" +#include "lrng_numa.h" + +static int __maybe_unused +lrng_hash_switch(struct lrng_drng *drng_store, const void *cb, int node) +{ + const struct lrng_hash_cb *new_cb = (const struct lrng_hash_cb *)cb; + const struct lrng_hash_cb *old_cb = drng_store->hash_cb; + unsigned long flags; + u32 i; + void *new_hash = new_cb->hash_alloc(); + void *old_hash = drng_store->hash; + int ret; + + if (IS_ERR(new_hash)) { + pr_warn("could not allocate new LRNG pool hash (%ld)\n", + PTR_ERR(new_hash)); + return PTR_ERR(new_hash); + } + + if (new_cb->hash_digestsize(new_hash) > LRNG_MAX_DIGESTSIZE) { + pr_warn("digest size of newly requested hash too large\n"); + new_cb->hash_dealloc(new_hash); + return -EINVAL; + } + + write_lock_irqsave(&drng_store->hash_lock, flags); + + /* Trigger the switch for each entropy source */ + for_each_lrng_es(i) { + if (!lrng_es[i]->switch_hash) + continue; + ret = lrng_es[i]->switch_hash(drng_store, node, new_cb, + new_hash, old_cb); + if (ret) { + u32 j; + + /* Revert all already executed operations */ + for (j = 0; j < i; j++) { + if (!lrng_es[j]->switch_hash) + continue; + WARN_ON(lrng_es[j]->switch_hash(drng_store, + node, old_cb, + old_hash, + new_cb)); + } + goto err; + } + } + + drng_store->hash = new_hash; + drng_store->hash_cb = new_cb; + old_cb->hash_dealloc(old_hash); + pr_info("Conditioning function allocated for DRNG for NUMA node %d\n", + node); + +err: + write_unlock_irqrestore(&drng_store->hash_lock, flags); + return ret; +} + +static int __maybe_unused +lrng_drng_switch(struct lrng_drng *drng_store, const void *cb, int node) +{ + const struct lrng_drng_cb *new_cb = (const struct lrng_drng_cb *)cb; + const struct lrng_drng_cb *old_cb = drng_store->drng_cb; + int ret; + u8 seed[LRNG_DRNG_SECURITY_STRENGTH_BYTES]; + void *new_drng = new_cb->drng_alloc(LRNG_DRNG_SECURITY_STRENGTH_BYTES); + void *old_drng = drng_store->drng; + u32 current_security_strength; + bool reset_drng = !lrng_get_available(); + + if (IS_ERR(new_drng)) { + pr_warn("could not allocate new DRNG for NUMA node %d (%ld)\n", + node, PTR_ERR(new_drng)); + return PTR_ERR(new_drng); + } + + current_security_strength = lrng_security_strength(); + mutex_lock(&drng_store->lock); + + /* + * Pull from existing DRNG to seed new DRNG regardless of seed status + * of old DRNG -- the entropy state for the DRNG is left unchanged which + * implies that als the new DRNG is reseeded when deemed necessary. This + * seeding of the new DRNG shall only ensure that the new DRNG has the + * same entropy as the old DRNG. + */ + ret = old_cb->drng_generate(old_drng, seed, sizeof(seed)); + mutex_unlock(&drng_store->lock); + + if (ret < 0) { + reset_drng = true; + pr_warn("getting random data from DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + /* seed new DRNG with data */ + ret = new_cb->drng_seed(new_drng, seed, ret); + memzero_explicit(seed, sizeof(seed)); + if (ret < 0) { + reset_drng = true; + pr_warn("seeding of new DRNG failed for NUMA node %d (%d)\n", + node, ret); + } else { + pr_debug("seeded new DRNG of NUMA node %d instance from old DRNG instance\n", + node); + } + } + + mutex_lock(&drng_store->lock); + + if (reset_drng) + lrng_drng_reset(drng_store); + + drng_store->drng = new_drng; + drng_store->drng_cb = new_cb; + + /* Reseed if previous LRNG security strength was insufficient */ + if (current_security_strength < lrng_security_strength()) + drng_store->force_reseed = true; + + /* Force oversampling seeding as we initialize DRNG */ + if (IS_ENABLED(CONFIG_CRYPTO_FIPS)) + lrng_unset_fully_seeded(drng_store); + + if (lrng_state_min_seeded()) + lrng_set_entropy_thresh(lrng_get_seed_entropy_osr( + drng_store->fully_seeded)); + + old_cb->drng_dealloc(old_drng); + + pr_info("DRNG of NUMA node %d switched\n", node); + + mutex_unlock(&drng_store->lock); + return ret; +} + +/* + * Switch the existing DRNG and hash instances with new using the new crypto + * callbacks. The caller must hold the lrng_crypto_cb_update lock. + */ +static int lrng_switch(const void *cb, + int(*switcher)(struct lrng_drng *drng_store, + const void *cb, int node)) +{ + struct lrng_drng **lrng_drng = lrng_drng_instances(); + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret = 0; + + if (lrng_drng) { + u32 node; + + for_each_online_node(node) { + if (lrng_drng[node]) + ret = switcher(lrng_drng[node], cb, node); + } + } else { + ret = switcher(lrng_drng_init, cb, 0); + } + + return 0; +} + +/* + * lrng_set_drng_cb - Register new cryptographic callback functions for DRNG + * The registering implies that all old DRNG states are replaced with new + * DRNG states. + * + * drng_cb: Callback functions to be registered -- if NULL, use the default + * callbacks defined at compile time. + * + * Return: + * * 0 on success + * * < 0 on error + */ +int lrng_set_drng_cb(const struct lrng_drng_cb *drng_cb) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret; + + if (!IS_ENABLED(CONFIG_LRNG_SWITCH_DRNG)) + return -EOPNOTSUPP; + + if (!drng_cb) + drng_cb = lrng_default_drng_cb; + + mutex_lock(&lrng_crypto_cb_update); + + /* + * If a callback other than the default is set, allow it only to be + * set back to the default callback. This ensures that multiple + * different callbacks can be registered at the same time. If a + * callback different from the current callback and the default + * callback shall be set, the current callback must be deregistered + * (e.g. the kernel module providing it must be unloaded) and the new + * implementation can be registered. + */ + if ((drng_cb != lrng_default_drng_cb) && + (lrng_drng_init->drng_cb != lrng_default_drng_cb)) { + pr_warn("disallow setting new DRNG callbacks, unload the old callbacks first!\n"); + ret = -EINVAL; + goto out; + } + + ret = lrng_switch(drng_cb, lrng_drng_switch); + /* The swtich may imply new entropy due to larger DRNG sec strength. */ + if (!ret) + lrng_es_add_entropy(); + +out: + mutex_unlock(&lrng_crypto_cb_update); + return ret; +} +EXPORT_SYMBOL(lrng_set_drng_cb); + +/* + * lrng_set_hash_cb - Register new cryptographic callback functions for hash + * The registering implies that all old hash states are replaced with new + * hash states. + * + * @hash_cb: Callback functions to be registered -- if NULL, use the default + * callbacks defined at compile time. + * + * Return: + * * 0 on success + * * < 0 on error + */ +int lrng_set_hash_cb(const struct lrng_hash_cb *hash_cb) +{ + struct lrng_drng *lrng_drng_init = lrng_drng_init_instance(); + int ret; + + if (!IS_ENABLED(CONFIG_LRNG_SWITCH_HASH)) + return -EOPNOTSUPP; + + if (!hash_cb) + hash_cb = lrng_default_hash_cb; + + mutex_lock(&lrng_crypto_cb_update); + + /* Comment from lrng_set_drng_cb applies. */ + if ((hash_cb != lrng_default_hash_cb) && + (lrng_drng_init->hash_cb != lrng_default_hash_cb)) { + pr_warn("disallow setting new hash callbacks, unload the old callbacks first!\n"); + ret = -EINVAL; + goto out; + } + + ret = lrng_switch(hash_cb, lrng_hash_switch); + /* The swtich may imply new entropy due to larger digest size. */ + if (!ret) + lrng_es_add_entropy(); + +out: + mutex_unlock(&lrng_crypto_cb_update); + return ret; +} +EXPORT_SYMBOL(lrng_set_hash_cb); diff --git a/drivers/char/lrng/lrng_sysctl.c b/drivers/char/lrng/lrng_sysctl.c new file mode 100644 index 0000000000000..0f657d01bf62c --- /dev/null +++ b/drivers/char/lrng/lrng_sysctl.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG sysctl interfaces + * + * Copyright (C) 2022, Stephan Mueller + */ + +#include +#include +#include +#include + +#include "lrng_drng_mgr.h" +#include "lrng_es_mgr.h" +#include "lrng_sysctl.h" + +/* + * This function is used to return both the bootid UUID, and random + * UUID. The difference is in whether table->data is NULL; if it is, + * then a new UUID is generated and returned to the user. + * + * If the user accesses this via the proc interface, the UUID will be + * returned as an ASCII string in the standard UUID format; if via the + * sysctl system call, as 16 bytes of binary data. + */ +static int lrng_sysctl_do_uuid(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + unsigned char buf[64], tmp_uuid[16], *uuid; + + uuid = table->data; + if (!uuid) { + uuid = tmp_uuid; + generate_random_uuid(uuid); + } else { + static DEFINE_SPINLOCK(bootid_spinlock); + + spin_lock(&bootid_spinlock); + if (!uuid[8]) + generate_random_uuid(uuid); + spin_unlock(&bootid_spinlock); + } + + sprintf(buf, "%pU", uuid); + + fake_table.data = buf; + fake_table.maxlen = sizeof(buf); + + return proc_dostring(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_sysctl_do_entropy(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + int entropy_count; + + entropy_count = lrng_avail_entropy(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_sysctl_do_poolsize(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table fake_table; + u32 i, entropy_count = 0; + + for_each_lrng_es(i) + entropy_count += lrng_es[i]->max_entropy(); + + fake_table.data = &entropy_count; + fake_table.maxlen = sizeof(entropy_count); + + return proc_dointvec(&fake_table, write, buffer, lenp, ppos); +} + +static int lrng_min_write_thresh; +static int lrng_max_write_thresh = (LRNG_WRITE_WAKEUP_ENTROPY << 3); +static char lrng_sysctl_bootid[16]; +static int lrng_drng_reseed_max_min; + +void lrng_sysctl_update_max_write_thresh(u32 new_digestsize) +{ + lrng_max_write_thresh = (int)new_digestsize; + /* Ensure that changes to the global variable are visible */ + mb(); +} + +static struct ctl_table random_table[] = { + { + .procname = "poolsize", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_sysctl_do_poolsize, + }, + { + .procname = "entropy_avail", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = lrng_sysctl_do_entropy, + }, + { + .procname = "write_wakeup_threshold", + .data = &lrng_write_wakeup_bits, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &lrng_min_write_thresh, + .extra2 = &lrng_max_write_thresh, + }, + { + .procname = "boot_id", + .data = &lrng_sysctl_bootid, + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_sysctl_do_uuid, + }, + { + .procname = "uuid", + .maxlen = 16, + .mode = 0444, + .proc_handler = lrng_sysctl_do_uuid, + }, + { + .procname = "urandom_min_reseed_secs", + .data = &lrng_drng_reseed_max_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &lrng_drng_reseed_max_min, + }, + { } +}; + +static int __init random_sysctls_init(void) +{ + register_sysctl_init("kernel/random", random_table); + return 0; +} +device_initcall(random_sysctls_init); diff --git a/drivers/char/lrng/lrng_sysctl.h b/drivers/char/lrng/lrng_sysctl.h new file mode 100644 index 0000000000000..4b487e5077ed9 --- /dev/null +++ b/drivers/char/lrng/lrng_sysctl.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_SYSCTL_H +#define _LRNG_SYSCTL_H + +#ifdef CONFIG_LRNG_SYSCTL +void lrng_sysctl_update_max_write_thresh(u32 new_digestsize); +#else +static inline void lrng_sysctl_update_max_write_thresh(u32 new_digestsize) { } +#endif + +#endif /* _LRNG_SYSCTL_H */ diff --git a/drivers/char/lrng/lrng_testing.c b/drivers/char/lrng/lrng_testing.c new file mode 100644 index 0000000000000..101140085d818 --- /dev/null +++ b/drivers/char/lrng/lrng_testing.c @@ -0,0 +1,901 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * LRNG testing interfaces to obtain raw entropy + * + * Copyright (C) 2022, Stephan Mueller + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lrng_definitions.h" +#include "lrng_drng_chacha20.h" +#include "lrng_sha.h" +#include "lrng_testing.h" + +#if defined(CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY) || \ + defined(CONFIG_LRNG_RAW_SCHED_PID_ENTROPY) || \ + defined(CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY) || \ + defined(CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY) || \ + defined(CONFIG_LRNG_SCHED_PERF) +#define LRNG_TESTING_USE_BUSYLOOP +#endif + +#ifdef CONFIG_LRNG_TESTING_RECORDING + +#define LRNG_TESTING_RINGBUFFER_SIZE 1024 +#define LRNG_TESTING_RINGBUFFER_MASK (LRNG_TESTING_RINGBUFFER_SIZE - 1) + +struct lrng_testing { + u32 lrng_testing_rb[LRNG_TESTING_RINGBUFFER_SIZE]; + u32 rb_reader; + atomic_t rb_writer; + atomic_t lrng_testing_enabled; + spinlock_t lock; + wait_queue_head_t read_wait; +}; + +/*************************** Generic Data Handling ****************************/ + +/* + * boot variable: + * 0 ==> No boot test, gathering of runtime data allowed + * 1 ==> Boot test enabled and ready for collecting data, gathering runtime + * data is disabled + * 2 ==> Boot test completed and disabled, gathering of runtime data is + * disabled + */ + +static void lrng_testing_reset(struct lrng_testing *data) +{ + unsigned long flags; + + spin_lock_irqsave(&data->lock, flags); + data->rb_reader = 0; + atomic_set(&data->rb_writer, 0); + spin_unlock_irqrestore(&data->lock, flags); +} + +static void lrng_testing_init(struct lrng_testing *data, u32 boot) +{ + /* + * The boot time testing implies we have a running test. If the + * caller wants to clear it, he has to unset the boot_test flag + * at runtime via sysfs to enable regular runtime testing + */ + if (boot) + return; + + lrng_testing_reset(data); + atomic_set(&data->lrng_testing_enabled, 1); + pr_warn("Enabling data collection\n"); +} + +static void lrng_testing_fini(struct lrng_testing *data, u32 boot) +{ + /* If we have boot data, we do not reset yet to allow data to be read */ + if (boot) + return; + + atomic_set(&data->lrng_testing_enabled, 0); + lrng_testing_reset(data); + pr_warn("Disabling data collection\n"); +} + +static bool lrng_testing_store(struct lrng_testing *data, u32 value, + u32 *boot) +{ + unsigned long flags; + + if (!atomic_read(&data->lrng_testing_enabled) && (*boot != 1)) + return false; + + spin_lock_irqsave(&data->lock, flags); + + /* + * Disable entropy testing for boot time testing after ring buffer + * is filled. + */ + if (*boot) { + if (((u32)atomic_read(&data->rb_writer)) > + LRNG_TESTING_RINGBUFFER_SIZE) { + *boot = 2; + pr_warn_once("One time data collection test disabled\n"); + spin_unlock_irqrestore(&data->lock, flags); + return false; + } + + if (atomic_read(&data->rb_writer) == 1) + pr_warn("One time data collection test enabled\n"); + } + + data->lrng_testing_rb[((u32)atomic_read(&data->rb_writer)) & + LRNG_TESTING_RINGBUFFER_MASK] = value; + atomic_inc(&data->rb_writer); + + spin_unlock_irqrestore(&data->lock, flags); + +#ifndef LRNG_TESTING_USE_BUSYLOOP + if (wq_has_sleeper(&data->read_wait)) + wake_up_interruptible(&data->read_wait); +#endif + + return true; +} + +static bool lrng_testing_have_data(struct lrng_testing *data) +{ + return ((((u32)atomic_read(&data->rb_writer)) & + LRNG_TESTING_RINGBUFFER_MASK) != + (data->rb_reader & LRNG_TESTING_RINGBUFFER_MASK)); +} + +static int lrng_testing_reader(struct lrng_testing *data, u32 *boot, + u8 *outbuf, u32 outbuflen) +{ + unsigned long flags; + int collected_data = 0; + + lrng_testing_init(data, *boot); + + while (outbuflen) { + u32 writer = (u32)atomic_read(&data->rb_writer); + + spin_lock_irqsave(&data->lock, flags); + + /* We have no data or reached the writer. */ + if (!writer || (writer == data->rb_reader)) { + + spin_unlock_irqrestore(&data->lock, flags); + + /* + * Now we gathered all boot data, enable regular data + * collection. + */ + if (*boot) { + *boot = 0; + goto out; + } + +#ifdef LRNG_TESTING_USE_BUSYLOOP + while (!lrng_testing_have_data(data)) + ; +#else + wait_event_interruptible(data->read_wait, + lrng_testing_have_data(data)); +#endif + if (signal_pending(current)) { + collected_data = -ERESTARTSYS; + goto out; + } + + continue; + } + + /* We copy out word-wise */ + if (outbuflen < sizeof(u32)) { + spin_unlock_irqrestore(&data->lock, flags); + goto out; + } + + memcpy(outbuf, &data->lrng_testing_rb[data->rb_reader], + sizeof(u32)); + data->rb_reader++; + + spin_unlock_irqrestore(&data->lock, flags); + + outbuf += sizeof(u32); + outbuflen -= sizeof(u32); + collected_data += sizeof(u32); + } + +out: + lrng_testing_fini(data, *boot); + return collected_data; +} + +static int lrng_testing_extract_user(struct file *file, char __user *buf, + size_t nbytes, loff_t *ppos, + int (*reader)(u8 *outbuf, u32 outbuflen)) +{ + u8 *tmp, *tmp_aligned; + int ret = 0, large_request = (nbytes > 256); + + if (!nbytes) + return 0; + + /* + * The intention of this interface is for collecting at least + * 1000 samples due to the SP800-90B requirements. So, we make no + * effort in avoiding allocating more memory that actually needed + * by the user. Hence, we allocate sufficient memory to always hold + * that amount of data. + */ + tmp = kmalloc(LRNG_TESTING_RINGBUFFER_SIZE + sizeof(u32), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + tmp_aligned = PTR_ALIGN(tmp, sizeof(u32)); + + while (nbytes) { + int i; + + if (large_request && need_resched()) { + if (signal_pending(current)) { + if (ret == 0) + ret = -ERESTARTSYS; + break; + } + schedule(); + } + + i = min_t(int, nbytes, LRNG_TESTING_RINGBUFFER_SIZE); + i = reader(tmp_aligned, i); + if (i <= 0) { + if (i < 0) + ret = i; + break; + } + if (copy_to_user(buf, tmp_aligned, i)) { + ret = -EFAULT; + break; + } + + nbytes -= i; + buf += i; + ret += i; + } + + kfree_sensitive(tmp); + + if (ret > 0) + *ppos += ret; + + return ret; +} + +#endif /* CONFIG_LRNG_TESTING_RECORDING */ + +/************* Raw High-Resolution IRQ Timer Entropy Data Handling ************/ + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + +static u32 boot_raw_hires_test = 0; +module_param(boot_raw_hires_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_hires_test, "Enable gathering boot time high resolution timer entropy of the first IRQ entropy events"); + +static struct lrng_testing lrng_raw_hires = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_hires.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_hires.read_wait) +}; + +bool lrng_raw_hires_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_hires, value, &boot_raw_hires_test); +} + +static int lrng_raw_hires_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_hires, &boot_raw_hires_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_hires_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_hires_entropy_reader); +} + +static const struct file_operations lrng_raw_hires_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_hires_read, +}; + +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +/********************* Raw Jiffies Entropy Data Handling **********************/ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + +static u32 boot_raw_jiffies_test = 0; +module_param(boot_raw_jiffies_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_jiffies_test, "Enable gathering boot time high resolution timer entropy of the first entropy events"); + +static struct lrng_testing lrng_raw_jiffies = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_jiffies.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_jiffies.read_wait) +}; + +bool lrng_raw_jiffies_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_jiffies, value, + &boot_raw_jiffies_test); +} + +static int lrng_raw_jiffies_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_jiffies, &boot_raw_jiffies_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_jiffies_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_jiffies_entropy_reader); +} + +static const struct file_operations lrng_raw_jiffies_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_jiffies_read, +}; + +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +/************************** Raw IRQ Data Handling ****************************/ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + +static u32 boot_raw_irq_test = 0; +module_param(boot_raw_irq_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_irq_test, "Enable gathering boot time entropy of the first IRQ entropy events"); + +static struct lrng_testing lrng_raw_irq = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_irq.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_irq.read_wait) +}; + +bool lrng_raw_irq_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_irq, value, &boot_raw_irq_test); +} + +static int lrng_raw_irq_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_irq, &boot_raw_irq_test, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_irq_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_irq_entropy_reader); +} + +static const struct file_operations lrng_raw_irq_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_irq_read, +}; + +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +/************************ Raw _RET_IP_ Data Handling **************************/ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + +static u32 boot_raw_retip_test = 0; +module_param(boot_raw_retip_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_retip_test, "Enable gathering boot time entropy of the first return instruction pointer entropy events"); + +static struct lrng_testing lrng_raw_retip = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_retip.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_retip.read_wait) +}; + +bool lrng_raw_retip_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_retip, value, &boot_raw_retip_test); +} + +static int lrng_raw_retip_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_retip, &boot_raw_retip_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_retip_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_retip_entropy_reader); +} + +static const struct file_operations lrng_raw_retip_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_retip_read, +}; + +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +/********************** Raw IRQ register Data Handling ************************/ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + +static u32 boot_raw_regs_test = 0; +module_param(boot_raw_regs_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_regs_test, "Enable gathering boot time entropy of the first interrupt register entropy events"); + +static struct lrng_testing lrng_raw_regs = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_regs.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_regs.read_wait) +}; + +bool lrng_raw_regs_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_regs, value, &boot_raw_regs_test); +} + +static int lrng_raw_regs_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_regs, &boot_raw_regs_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_regs_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_regs_entropy_reader); +} + +static const struct file_operations lrng_raw_regs_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_regs_read, +}; + +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +/********************** Raw Entropy Array Data Handling ***********************/ + +#ifdef CONFIG_LRNG_RAW_ARRAY + +static u32 boot_raw_array = 0; +module_param(boot_raw_array, uint, 0644); +MODULE_PARM_DESC(boot_raw_array, "Enable gathering boot time raw noise array data of the first entropy events"); + +static struct lrng_testing lrng_raw_array = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_array.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_array.read_wait) +}; + +bool lrng_raw_array_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_array, value, &boot_raw_array); +} + +static int lrng_raw_array_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_array, &boot_raw_array, outbuf, + outbuflen); +} + +static ssize_t lrng_raw_array_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_array_entropy_reader); +} + +static const struct file_operations lrng_raw_array_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_array_read, +}; + +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +/******************** Interrupt Performance Data Handling *********************/ + +#ifdef CONFIG_LRNG_IRQ_PERF + +static u32 boot_irq_perf = 0; +module_param(boot_irq_perf, uint, 0644); +MODULE_PARM_DESC(boot_irq_perf, "Enable gathering interrupt entropy source performance data"); + +static struct lrng_testing lrng_irq_perf = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_irq_perf.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_irq_perf.read_wait) +}; + +bool lrng_perf_time(u32 start) +{ + return lrng_testing_store(&lrng_irq_perf, random_get_entropy() - start, + &boot_irq_perf); +} + +static int lrng_irq_perf_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_irq_perf, &boot_irq_perf, outbuf, + outbuflen); +} + +static ssize_t lrng_irq_perf_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_irq_perf_reader); +} + +static const struct file_operations lrng_irq_perf_fops = { + .owner = THIS_MODULE, + .read = lrng_irq_perf_read, +}; + +#endif /* CONFIG_LRNG_IRQ_PERF */ + +/****** Raw High-Resolution Scheduler-based Timer Entropy Data Handling *******/ + +#ifdef CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY + +static u32 boot_raw_sched_hires_test = 0; +module_param(boot_raw_sched_hires_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_sched_hires_test, "Enable gathering boot time high resolution timer entropy of the first Scheduler-based entropy events"); + +static struct lrng_testing lrng_raw_sched_hires = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_sched_hires.lock), + .read_wait = + __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_sched_hires.read_wait) +}; + +bool lrng_raw_sched_hires_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_sched_hires, value, + &boot_raw_sched_hires_test); +} + +static int lrng_raw_sched_hires_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_sched_hires, + &boot_raw_sched_hires_test, + outbuf, outbuflen); +} + +static ssize_t lrng_raw_sched_hires_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_sched_hires_entropy_reader); +} + +static const struct file_operations lrng_raw_sched_hires_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_sched_hires_read, +}; + +#endif /* CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY */ + +/******************** Interrupt Performance Data Handling *********************/ + +#ifdef CONFIG_LRNG_SCHED_PERF + +static u32 boot_sched_perf = 0; +module_param(boot_sched_perf, uint, 0644); +MODULE_PARM_DESC(boot_sched_perf, "Enable gathering scheduler-based entropy source performance data"); + +static struct lrng_testing lrng_sched_perf = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_sched_perf.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_sched_perf.read_wait) +}; + +bool lrng_sched_perf_time(u32 start) +{ + return lrng_testing_store(&lrng_sched_perf, random_get_entropy() - start, + &boot_sched_perf); +} + +static int lrng_sched_perf_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_sched_perf, &boot_sched_perf, outbuf, + outbuflen); +} + +static ssize_t lrng_sched_perf_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_sched_perf_reader); +} + +static const struct file_operations lrng_sched_perf_fops = { + .owner = THIS_MODULE, + .read = lrng_sched_perf_read, +}; + +#endif /* CONFIG_LRNG_SCHED_PERF */ + +/*************** Raw Scheduler task_struct->pid Data Handling *****************/ + +#ifdef CONFIG_LRNG_RAW_SCHED_PID_ENTROPY + +static u32 boot_raw_sched_pid_test = 0; +module_param(boot_raw_sched_pid_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_sched_pid_test, "Enable gathering boot time entropy of the first PIDs collected by the scheduler entropy source"); + +static struct lrng_testing lrng_raw_sched_pid = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_sched_pid.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_sched_pid.read_wait) +}; + +bool lrng_raw_sched_pid_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_sched_pid, value, + &boot_raw_sched_pid_test); +} + +static int lrng_raw_sched_pid_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_sched_pid, + &boot_raw_sched_pid_test, outbuf, outbuflen); +} + +static ssize_t lrng_raw_sched_pid_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_sched_pid_entropy_reader); +} + +static const struct file_operations lrng_raw_sched_pid_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_sched_pid_read, +}; + +#endif /* CONFIG_LRNG_RAW_SCHED_PID_ENTROPY */ + + +/*********** Raw Scheduler task_struct->start_time Data Handling **************/ + +#ifdef CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY + +static u32 boot_raw_sched_starttime_test = 0; +module_param(boot_raw_sched_starttime_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_sched_starttime_test, "Enable gathering boot time entropy of the first task start times collected by the scheduler entropy source"); + +static struct lrng_testing lrng_raw_sched_starttime = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_sched_starttime.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_sched_starttime.read_wait) +}; + +bool lrng_raw_sched_starttime_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_sched_starttime, value, + &boot_raw_sched_starttime_test); +} + +static int lrng_raw_sched_starttime_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_sched_starttime, + &boot_raw_sched_starttime_test, outbuf, outbuflen); +} + +static ssize_t lrng_raw_sched_starttime_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_sched_starttime_entropy_reader); +} + +static const struct file_operations lrng_raw_sched_starttime_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_sched_starttime_read, +}; + +#endif /* CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY */ + +/************** Raw Scheduler task_struct->nvcsw Data Handling ****************/ + +#ifdef CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY + +static u32 boot_raw_sched_nvcsw_test = 0; +module_param(boot_raw_sched_nvcsw_test, uint, 0644); +MODULE_PARM_DESC(boot_raw_sched_nvcsw_test, "Enable gathering boot time entropy of the first task context switch numbers collected by the scheduler entropy source"); + +static struct lrng_testing lrng_raw_sched_nvcsw = { + .rb_reader = 0, + .rb_writer = ATOMIC_INIT(0), + .lock = __SPIN_LOCK_UNLOCKED(lrng_raw_sched_nvcsw.lock), + .read_wait = __WAIT_QUEUE_HEAD_INITIALIZER(lrng_raw_sched_nvcsw.read_wait) +}; + +bool lrng_raw_sched_nvcsw_entropy_store(u32 value) +{ + return lrng_testing_store(&lrng_raw_sched_nvcsw, value, + &boot_raw_sched_nvcsw_test); +} + +static int lrng_raw_sched_nvcsw_entropy_reader(u8 *outbuf, u32 outbuflen) +{ + return lrng_testing_reader(&lrng_raw_sched_nvcsw, + &boot_raw_sched_nvcsw_test, outbuf, outbuflen); +} + +static ssize_t lrng_raw_sched_nvcsw_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + return lrng_testing_extract_user(file, to, count, ppos, + lrng_raw_sched_nvcsw_entropy_reader); +} + +static const struct file_operations lrng_raw_sched_nvcsw_fops = { + .owner = THIS_MODULE, + .read = lrng_raw_sched_nvcsw_read, +}; + +#endif /* CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY */ + +/*********************************** ACVT ************************************/ + +#ifdef CONFIG_LRNG_ACVT_HASH + +/* maximum amount of data to be hashed as defined by ACVP */ +#define LRNG_ACVT_MAX_SHA_MSG (65536 >> 3) + +/* + * As we use static variables to store the data, it is clear that the + * test interface is only able to handle single threaded testing. This is + * considered to be sufficient for testing. If multi-threaded use of the + * ACVT test interface would be performed, the caller would get garbage + * but the kernel operation is unaffected by this. + */ +static u8 lrng_acvt_hash_data[LRNG_ACVT_MAX_SHA_MSG] + __aligned(LRNG_KCAPI_ALIGN); +static atomic_t lrng_acvt_hash_data_size = ATOMIC_INIT(0); +static u8 lrng_acvt_hash_digest[LRNG_ATOMIC_DIGEST_SIZE]; + +static ssize_t lrng_acvt_hash_write(struct file *file, const char __user *buf, + size_t nbytes, loff_t *ppos) +{ + if (nbytes > LRNG_ACVT_MAX_SHA_MSG) + return -EINVAL; + + atomic_set(&lrng_acvt_hash_data_size, (int)nbytes); + + return simple_write_to_buffer(lrng_acvt_hash_data, + LRNG_ACVT_MAX_SHA_MSG, ppos, buf, nbytes); +} + +static ssize_t lrng_acvt_hash_read(struct file *file, char __user *to, + size_t count, loff_t *ppos) +{ + SHASH_DESC_ON_STACK(shash, NULL); + const struct lrng_hash_cb *hash_cb = &lrng_sha_hash_cb; + ssize_t ret; + + if (count > LRNG_ATOMIC_DIGEST_SIZE) + return -EINVAL; + + ret = hash_cb->hash_init(shash, NULL) ?: + hash_cb->hash_update(shash, lrng_acvt_hash_data, + atomic_read_u32(&lrng_acvt_hash_data_size)) ?: + hash_cb->hash_final(shash, lrng_acvt_hash_digest); + if (ret) + return ret; + + return simple_read_from_buffer(to, count, ppos, lrng_acvt_hash_digest, + sizeof(lrng_acvt_hash_digest)); +} + +static const struct file_operations lrng_acvt_hash_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .llseek = default_llseek, + .read = lrng_acvt_hash_read, + .write = lrng_acvt_hash_write, +}; + +#endif /* CONFIG_LRNG_ACVT_DRNG */ + +/************************************************************************** + * Debugfs interface + **************************************************************************/ + +static int __init lrng_raw_init(void) +{ + struct dentry *lrng_raw_debugfs_root; + + lrng_raw_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_hires", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_hires_fops); +#endif +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_jiffies", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_jiffies_fops); +#endif +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY + debugfs_create_file_unsafe("lrng_raw_irq", 0400, lrng_raw_debugfs_root, + NULL, &lrng_raw_irq_fops); +#endif +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY + debugfs_create_file_unsafe("lrng_raw_retip", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_retip_fops); +#endif +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY + debugfs_create_file_unsafe("lrng_raw_regs", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_regs_fops); +#endif +#ifdef CONFIG_LRNG_RAW_ARRAY + debugfs_create_file_unsafe("lrng_raw_array", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_array_fops); +#endif +#ifdef CONFIG_LRNG_IRQ_PERF + debugfs_create_file_unsafe("lrng_irq_perf", 0400, lrng_raw_debugfs_root, + NULL, &lrng_irq_perf_fops); +#endif +#ifdef CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY + debugfs_create_file_unsafe("lrng_raw_sched_hires", 0400, + lrng_raw_debugfs_root, + NULL, &lrng_raw_sched_hires_fops); +#endif +#ifdef CONFIG_LRNG_RAW_SCHED_PID_ENTROPY + debugfs_create_file_unsafe("lrng_raw_sched_pid", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_sched_pid_fops); +#endif +#ifdef CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY + debugfs_create_file_unsafe("lrng_raw_sched_starttime", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_sched_starttime_fops); +#endif +#ifdef CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY + debugfs_create_file_unsafe("lrng_raw_sched_nvcsw", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_raw_sched_nvcsw_fops); +#endif +#ifdef CONFIG_LRNG_SCHED_PERF + debugfs_create_file_unsafe("lrng_sched_perf", 0400, + lrng_raw_debugfs_root, NULL, + &lrng_sched_perf_fops); +#endif +#ifdef CONFIG_LRNG_ACVT_HASH + debugfs_create_file_unsafe("lrng_acvt_hash", 0600, + lrng_raw_debugfs_root, NULL, + &lrng_acvt_hash_fops); +#endif + + return 0; +} + +module_init(lrng_raw_init); diff --git a/drivers/char/lrng/lrng_testing.h b/drivers/char/lrng/lrng_testing.h new file mode 100644 index 0000000000000..672a7fca45956 --- /dev/null +++ b/drivers/char/lrng/lrng_testing.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2022, Stephan Mueller + */ + +#ifndef _LRNG_TESTING_H +#define _LRNG_TESTING_H + +#ifdef CONFIG_LRNG_RAW_HIRES_ENTROPY +bool lrng_raw_hires_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ +static inline bool lrng_raw_hires_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_HIRES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_JIFFIES_ENTROPY +bool lrng_raw_jiffies_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ +static inline bool lrng_raw_jiffies_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_JIFFIES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_IRQ_ENTROPY +bool lrng_raw_irq_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ +static inline bool lrng_raw_irq_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_IRQ_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_RETIP_ENTROPY +bool lrng_raw_retip_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ +static inline bool lrng_raw_retip_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_RETIP_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_REGS_ENTROPY +bool lrng_raw_regs_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_REGS_ENTROPY */ +static inline bool lrng_raw_regs_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_REGS_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_ARRAY +bool lrng_raw_array_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_ARRAY */ +static inline bool lrng_raw_array_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_ARRAY */ + +#ifdef CONFIG_LRNG_IRQ_PERF +bool lrng_perf_time(u32 start); +#else /* CONFIG_LRNG_IRQ_PERF */ +static inline bool lrng_perf_time(u32 start) { return false; } +#endif /*CONFIG_LRNG_IRQ_PERF */ + +#ifdef CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY +bool lrng_raw_sched_hires_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY */ +static inline bool +lrng_raw_sched_hires_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_SCHED_HIRES_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_SCHED_PID_ENTROPY +bool lrng_raw_sched_pid_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_SCHED_PID_ENTROPY */ +static inline bool +lrng_raw_sched_pid_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_SCHED_PID_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY +bool lrng_raw_sched_starttime_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY */ +static inline bool +lrng_raw_sched_starttime_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_SCHED_START_TIME_ENTROPY */ + +#ifdef CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY +bool lrng_raw_sched_nvcsw_entropy_store(u32 value); +#else /* CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY */ +static inline bool +lrng_raw_sched_nvcsw_entropy_store(u32 value) { return false; } +#endif /* CONFIG_LRNG_RAW_SCHED_NVCSW_ENTROPY */ + +#ifdef CONFIG_LRNG_SCHED_PERF +bool lrng_sched_perf_time(u32 start); +#else /* CONFIG_LRNG_SCHED_PERF */ +static inline bool lrng_sched_perf_time(u32 start) { return false; } +#endif /*CONFIG_LRNG_SCHED_PERF */ + +#endif /* _LRNG_TESTING_H */ diff --git a/drivers/char/random.c b/drivers/char/random.c index 4c9adb4f3d5d7..1f3072ee6b7cd 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -15,14 +15,12 @@ * - Sysctl interface. * * The high level overview is that there is one input pool, into which - * various pieces of data are hashed. Some of that data is then "credited" as - * having a certain number of bits of entropy. When enough bits of entropy are - * available, the hash is finalized and handed as a key to a stream cipher that - * expands it indefinitely for various consumers. This key is periodically - * refreshed as the various entropy collectors, described below, add data to the - * input pool and credit it. There is currently no Fortuna-like scheduler - * involved, which can lead to malicious entropy sources causing a premature - * reseed, and the entropy estimates are, at best, conservative guesses. + * various pieces of data are hashed. Prior to initialization, some of that + * data is then "credited" as having a certain number of bits of entropy. + * When enough bits of entropy are available, the hash is finalized and + * handed as a key to a stream cipher that expands it indefinitely for + * various consumers. This key is periodically refreshed as the various + * entropy collectors, described below, add data to the input pool. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -53,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -71,27 +70,26 @@ *********************************************************************/ /* - * crng_init = 0 --> Uninitialized - * 1 --> Initialized - * 2 --> Initialized from input_pool - * * crng_init is protected by base_crng->lock, and only increases - * its value (from 0->1->2). + * its value (from empty->early->ready). */ -static int crng_init = 0; -#define crng_ready() (likely(crng_init > 1)) -/* Various types of waiters for crng_init->2 transition. */ +static enum { + CRNG_EMPTY = 0, /* Little to no entropy collected */ + CRNG_EARLY = 1, /* At least POOL_EARLY_BITS collected */ + CRNG_READY = 2 /* Fully initialized with POOL_READY_BITS collected */ +} crng_init __read_mostly = CRNG_EMPTY; +#define crng_ready() (likely(crng_init >= CRNG_READY)) +/* Various types of waiters for crng_init->CRNG_READY transition. */ static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_chain_lock); static RAW_NOTIFIER_HEAD(random_ready_chain); /* Control how we warn userspace. */ -static struct ratelimit_state unseeded_warning = - RATELIMIT_STATE_INIT("warn_unseeded_randomness", HZ, 3); static struct ratelimit_state urandom_warning = RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3); -static int ratelimit_disable __read_mostly; +static int ratelimit_disable __read_mostly = + IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM); module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); @@ -144,7 +142,7 @@ EXPORT_SYMBOL(wait_for_random_bytes); * returns: 0 if callback is successfully added * -EALREADY if pool is already initialised (callback not called) */ -int register_random_ready_notifier(struct notifier_block *nb) +int __cold register_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; int ret = -EALREADY; @@ -162,7 +160,7 @@ int register_random_ready_notifier(struct notifier_block *nb) /* * Delete a previously registered readiness callback function. */ -int unregister_random_ready_notifier(struct notifier_block *nb) +int __cold unregister_random_ready_notifier(struct notifier_block *nb) { unsigned long flags; int ret; @@ -173,7 +171,7 @@ int unregister_random_ready_notifier(struct notifier_block *nb) return ret; } -static void process_random_ready_list(void) +static void __cold process_random_ready_list(void) { unsigned long flags; @@ -182,28 +180,10 @@ static void process_random_ready_list(void) spin_unlock_irqrestore(&random_ready_chain_lock, flags); } -#define warn_unseeded_randomness(previous) \ - _warn_unseeded_randomness(__func__, (void *)_RET_IP_, (previous)) - -static void _warn_unseeded_randomness(const char *func_name, void *caller, void **previous) -{ -#ifdef CONFIG_WARN_ALL_UNSEEDED_RANDOM - const bool print_once = false; -#else - static bool print_once __read_mostly; -#endif - - if (print_once || crng_ready() || - (previous && (caller == READ_ONCE(*previous)))) - return; - WRITE_ONCE(*previous, caller); -#ifndef CONFIG_WARN_ALL_UNSEEDED_RANDOM - print_once = true; -#endif - if (__ratelimit(&unseeded_warning)) - printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", - func_name, caller, crng_init); -} +#define warn_unseeded_randomness() \ + if (IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM) && !crng_ready()) \ + printk_deferred(KERN_NOTICE "random: %s called from %pS with crng_init=%d\n", \ + __func__, (void *)_RET_IP_, crng_init) /********************************************************************* @@ -216,7 +196,7 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void * * There are a few exported interfaces for use by other drivers: * - * void get_random_bytes(void *buf, size_t nbytes) + * void get_random_bytes(void *buf, size_t len) * u32 get_random_u32() * u64 get_random_u64() * unsigned int get_random_int() @@ -232,8 +212,8 @@ static void _warn_unseeded_randomness(const char *func_name, void *caller, void *********************************************************************/ enum { - CRNG_RESEED_INTERVAL = 300 * HZ, - CRNG_INIT_CNT_THRESH = 2 * CHACHA_KEY_SIZE + CRNG_RESEED_START_INTERVAL = HZ, + CRNG_RESEED_INTERVAL = 60 * HZ }; static struct { @@ -256,24 +236,17 @@ static DEFINE_PER_CPU(struct crng, crngs) = { .lock = INIT_LOCAL_LOCK(crngs.lock), }; -/* Used by crng_reseed() to extract a new seed from the input pool. */ -static bool drain_entropy(void *buf, size_t nbytes, bool force); +/* Used by crng_reseed() and crng_make_state() to extract a new seed from the input pool. */ +static void extract_entropy(void *buf, size_t len); -/* - * This extracts a new crng key from the input pool, but only if there is a - * sufficient amount of entropy available or force is true, in order to - * mitigate bruteforcing of newly added bits. - */ -static void crng_reseed(bool force) +/* This extracts a new crng key from the input pool. */ +static void crng_reseed(void) { unsigned long flags; unsigned long next_gen; u8 key[CHACHA_KEY_SIZE]; - bool finalize_init = false; - /* Only reseed if we can, to prevent brute forcing a small amount of new bits. */ - if (!drain_entropy(key, sizeof(key), force)) - return; + extract_entropy(key, sizeof(key)); /* * We copy the new key into the base_crng, overwriting the old one, @@ -288,28 +261,10 @@ static void crng_reseed(bool force) ++next_gen; WRITE_ONCE(base_crng.generation, next_gen); WRITE_ONCE(base_crng.birth, jiffies); - if (!crng_ready()) { - crng_init = 2; - finalize_init = true; - } + if (!crng_ready()) + crng_init = CRNG_READY; spin_unlock_irqrestore(&base_crng.lock, flags); memzero_explicit(key, sizeof(key)); - if (finalize_init) { - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } - } } /* @@ -345,10 +300,10 @@ static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], } /* - * Return whether the crng seed is considered to be sufficiently - * old that a reseeding might be attempted. This happens if the last - * reseeding was CRNG_RESEED_INTERVAL ago, or during early boot, at - * an interval proportional to the uptime. + * Return whether the crng seed is considered to be sufficiently old + * that a reseeding is needed. This happens if the last reseeding + * was CRNG_RESEED_INTERVAL ago, or during early boot, at an interval + * proportional to the uptime. */ static bool crng_has_old_seed(void) { @@ -360,10 +315,10 @@ static bool crng_has_old_seed(void) if (uptime >= CRNG_RESEED_INTERVAL / HZ * 2) WRITE_ONCE(early_boot, false); else - interval = max_t(unsigned int, 5 * HZ, + interval = max_t(unsigned int, CRNG_RESEED_START_INTERVAL, (unsigned int)uptime / 2 * HZ); } - return time_after(jiffies, READ_ONCE(base_crng.birth) + interval); + return time_is_before_jiffies(READ_ONCE(base_crng.birth) + interval); } /* @@ -382,28 +337,31 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], /* * For the fast path, we check whether we're ready, unlocked first, and * then re-check once locked later. In the case where we're really not - * ready, we do fast key erasure with the base_crng directly, because - * this is what crng_pre_init_inject() mutates during early init. + * ready, we do fast key erasure with the base_crng directly, extracting + * when crng_init is CRNG_EMPTY. */ if (!crng_ready()) { bool ready; spin_lock_irqsave(&base_crng.lock, flags); ready = crng_ready(); - if (!ready) + if (!ready) { + if (crng_init == CRNG_EMPTY) + extract_entropy(base_crng.key, sizeof(base_crng.key)); crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); + } spin_unlock_irqrestore(&base_crng.lock, flags); if (!ready) return; } /* - * If the base_crng is old enough, we try to reseed, which in turn - * bumps the generation counter that we check below. + * If the base_crng is old enough, we reseed, which in turn bumps the + * generation counter that we check below. */ if (unlikely(crng_has_old_seed())) - crng_reseed(false); + crng_reseed(); local_lock_irqsave(&crngs.lock, flags); crng = raw_cpu_ptr(&crngs); @@ -433,68 +391,24 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], local_unlock_irqrestore(&crngs.lock, flags); } -/* - * This function is for crng_init == 0 only. It loads entropy directly - * into the crng's key, without going through the input pool. It is, - * generally speaking, not very safe, but we use this only at early - * boot time when it's better to have something there rather than - * nothing. - * - * If account is set, then the crng_init_cnt counter is incremented. - * This shouldn't be set by functions like add_device_randomness(), - * where we can't trust the buffer passed to it is guaranteed to be - * unpredictable (so it might not have any entropy at all). - */ -static void crng_pre_init_inject(const void *input, size_t len, bool account) -{ - static int crng_init_cnt = 0; - struct blake2s_state hash; - unsigned long flags; - - blake2s_init(&hash, sizeof(base_crng.key)); - - spin_lock_irqsave(&base_crng.lock, flags); - if (crng_init != 0) { - spin_unlock_irqrestore(&base_crng.lock, flags); - return; - } - - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); - blake2s_update(&hash, input, len); - blake2s_final(&hash, base_crng.key); - - if (account) { - crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { - ++base_crng.generation; - crng_init = 1; - } - } - - spin_unlock_irqrestore(&base_crng.lock, flags); - - if (crng_init == 1) - pr_notice("fast init done\n"); -} - -static void _get_random_bytes(void *buf, size_t nbytes) +static void _get_random_bytes(void *buf, size_t len) { u32 chacha_state[CHACHA_STATE_WORDS]; u8 tmp[CHACHA_BLOCK_SIZE]; - size_t len; + size_t first_block_len; - if (!nbytes) + if (!len) return; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, buf, len); - nbytes -= len; - buf += len; + first_block_len = min_t(size_t, 32, len); + crng_make_state(chacha_state, buf, first_block_len); + len -= first_block_len; + buf += first_block_len; - while (nbytes) { - if (nbytes < CHACHA_BLOCK_SIZE) { + while (len) { + if (len < CHACHA_BLOCK_SIZE) { chacha20_block(chacha_state, tmp); - memcpy(buf, tmp, nbytes); + memcpy(buf, tmp, len); memzero_explicit(tmp, sizeof(tmp)); break; } @@ -502,7 +416,7 @@ static void _get_random_bytes(void *buf, size_t nbytes) chacha20_block(chacha_state, buf); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - nbytes -= CHACHA_BLOCK_SIZE; + len -= CHACHA_BLOCK_SIZE; buf += CHACHA_BLOCK_SIZE; } @@ -519,22 +433,20 @@ static void _get_random_bytes(void *buf, size_t nbytes) * wait_for_random_bytes() should be called and return 0 at least once * at any point prior. */ -void get_random_bytes(void *buf, size_t nbytes) +void get_random_bytes(void *buf, size_t len) { - static void *previous; - - warn_unseeded_randomness(&previous); - _get_random_bytes(buf, nbytes); + warn_unseeded_randomness(); + _get_random_bytes(buf, len); } EXPORT_SYMBOL(get_random_bytes); -static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) +static ssize_t get_random_bytes_user(struct iov_iter *iter) { - size_t len, left, ret = 0; u32 chacha_state[CHACHA_STATE_WORDS]; - u8 output[CHACHA_BLOCK_SIZE]; + u8 block[CHACHA_BLOCK_SIZE]; + size_t ret = 0, copied; - if (!nbytes) + if (unlikely(!iov_iter_count(iter))) return 0; /* @@ -548,30 +460,22 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * use chacha_state after, so we can simply return those bytes to * the user directly. */ - if (nbytes <= CHACHA_KEY_SIZE) { - ret = nbytes - copy_to_user(buf, &chacha_state[4], nbytes); + if (iov_iter_count(iter) <= CHACHA_KEY_SIZE) { + ret = copy_to_iter(&chacha_state[4], CHACHA_KEY_SIZE, iter); goto out_zero_chacha; } for (;;) { - chacha20_block(chacha_state, output); + chacha20_block(chacha_state, block); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; - len = min_t(size_t, nbytes, CHACHA_BLOCK_SIZE); - left = copy_to_user(buf, output, len); - if (left) { - ret += len - left; + copied = copy_to_iter(block, sizeof(block), iter); + ret += copied; + if (!iov_iter_count(iter) || copied != sizeof(block)) break; - } - buf += len; - ret += len; - nbytes -= len; - if (!nbytes) - break; - - BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); if (ret % PAGE_SIZE == 0) { if (signal_pending(current)) break; @@ -579,7 +483,7 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) } } - memzero_explicit(output, sizeof(output)); + memzero_explicit(block, sizeof(block)); out_zero_chacha: memzero_explicit(chacha_state, sizeof(chacha_state)); return ret ? ret : -EFAULT; @@ -591,98 +495,69 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) * provided by this function is okay, the function wait_for_random_bytes() * should be called and return 0 at least once at any point prior. */ -struct batched_entropy { - union { - /* - * We make this 1.5x a ChaCha block, so that we get the - * remaining 32 bytes from fast key erasure, plus one full - * block from the detached ChaCha state. We can increase - * the size of this later if needed so long as we keep the - * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. - */ - u64 entropy_u64[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u64))]; - u32 entropy_u32[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(u32))]; - }; - local_lock_t lock; - unsigned long generation; - unsigned int position; -}; - - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u64.lock), - .position = UINT_MAX -}; - -u64 get_random_u64(void) -{ - u64 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u64.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u64); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u64) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u64, sizeof(batch->entropy_u64)); - batch->position = 0; - batch->generation = next_gen; - } - - ret = batch->entropy_u64[batch->position]; - batch->entropy_u64[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u64.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u64); - -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32) = { - .lock = INIT_LOCAL_LOCK(batched_entropy_u32.lock), - .position = UINT_MAX -}; - -u32 get_random_u32(void) -{ - u32 ret; - unsigned long flags; - struct batched_entropy *batch; - static void *previous; - unsigned long next_gen; - - warn_unseeded_randomness(&previous); - - local_lock_irqsave(&batched_entropy_u32.lock, flags); - batch = raw_cpu_ptr(&batched_entropy_u32); - - next_gen = READ_ONCE(base_crng.generation); - if (batch->position >= ARRAY_SIZE(batch->entropy_u32) || - next_gen != batch->generation) { - _get_random_bytes(batch->entropy_u32, sizeof(batch->entropy_u32)); - batch->position = 0; - batch->generation = next_gen; - } - ret = batch->entropy_u32[batch->position]; - batch->entropy_u32[batch->position] = 0; - ++batch->position; - local_unlock_irqrestore(&batched_entropy_u32.lock, flags); - return ret; -} -EXPORT_SYMBOL(get_random_u32); +#define DEFINE_BATCHED_ENTROPY(type) \ +struct batch_ ##type { \ + /* \ + * We make this 1.5x a ChaCha block, so that we get the \ + * remaining 32 bytes from fast key erasure, plus one full \ + * block from the detached ChaCha state. We can increase \ + * the size of this later if needed so long as we keep the \ + * formula of (integer_blocks + 0.5) * CHACHA_BLOCK_SIZE. \ + */ \ + type entropy[CHACHA_BLOCK_SIZE * 3 / (2 * sizeof(type))]; \ + local_lock_t lock; \ + unsigned long generation; \ + unsigned int position; \ +}; \ + \ +static DEFINE_PER_CPU(struct batch_ ##type, batched_entropy_ ##type) = { \ + .lock = INIT_LOCAL_LOCK(batched_entropy_ ##type.lock), \ + .position = UINT_MAX \ +}; \ + \ +type get_random_ ##type(void) \ +{ \ + type ret; \ + unsigned long flags; \ + struct batch_ ##type *batch; \ + unsigned long next_gen; \ + \ + warn_unseeded_randomness(); \ + \ + if (!crng_ready()) { \ + _get_random_bytes(&ret, sizeof(ret)); \ + return ret; \ + } \ + \ + local_lock_irqsave(&batched_entropy_ ##type.lock, flags); \ + batch = raw_cpu_ptr(&batched_entropy_##type); \ + \ + next_gen = READ_ONCE(base_crng.generation); \ + if (batch->position >= ARRAY_SIZE(batch->entropy) || \ + next_gen != batch->generation) { \ + _get_random_bytes(batch->entropy, sizeof(batch->entropy)); \ + batch->position = 0; \ + batch->generation = next_gen; \ + } \ + \ + ret = batch->entropy[batch->position]; \ + batch->entropy[batch->position] = 0; \ + ++batch->position; \ + local_unlock_irqrestore(&batched_entropy_ ##type.lock, flags); \ + return ret; \ +} \ +EXPORT_SYMBOL(get_random_ ##type); + +DEFINE_BATCHED_ENTROPY(u64) +DEFINE_BATCHED_ENTROPY(u32) #ifdef CONFIG_SMP /* * This function is called when the CPU is coming up, with entry * CPUHP_RANDOM_PREPARE, which comes before CPUHP_WORKQUEUE_PREP. */ -int random_prepare_cpu(unsigned int cpu) +int __cold random_prepare_cpu(unsigned int cpu) { /* * When the cpu comes back online, immediately invalidate both @@ -696,62 +571,30 @@ int random_prepare_cpu(unsigned int cpu) } #endif -/** - * randomize_page - Generate a random, page aligned address - * @start: The smallest acceptable address the caller will take. - * @range: The size of the area, starting at @start, within which the - * random address must fall. - * - * If @start + @range would overflow, @range is capped. - * - * NOTE: Historical use of randomize_range, which this replaces, presumed that - * @start was already page aligned. We now align it regardless. - * - * Return: A page aligned address within [start, start + range). On error, - * @start is returned. - */ -unsigned long randomize_page(unsigned long start, unsigned long range) -{ - if (!PAGE_ALIGNED(start)) { - range -= PAGE_ALIGN(start) - start; - start = PAGE_ALIGN(start); - } - - if (start > ULONG_MAX - range) - range = ULONG_MAX - start; - - range >>= PAGE_SHIFT; - - if (range == 0) - return start; - - return start + (get_random_long() % range << PAGE_SHIFT); -} - /* * This function will use the architecture-specific hardware random * number generator if it is available. It is not recommended for * use. Use get_random_bytes() instead. It returns the number of * bytes filled in. */ -size_t __must_check get_random_bytes_arch(void *buf, size_t nbytes) +size_t __must_check get_random_bytes_arch(void *buf, size_t len) { - size_t left = nbytes; + size_t left = len; u8 *p = buf; while (left) { unsigned long v; - size_t chunk = min_t(size_t, left, sizeof(unsigned long)); + size_t block_len = min_t(size_t, left, sizeof(unsigned long)); if (!arch_get_random_long(&v)) break; - memcpy(p, &v, chunk); - p += chunk; - left -= chunk; + memcpy(p, &v, block_len); + p += block_len; + left -= block_len; } - return nbytes - left; + return len - left; } EXPORT_SYMBOL(get_random_bytes_arch); @@ -762,33 +605,28 @@ EXPORT_SYMBOL(get_random_bytes_arch); * * Callers may add entropy via: * - * static void mix_pool_bytes(const void *in, size_t nbytes) + * static void mix_pool_bytes(const void *buf, size_t len) * * After which, if added entropy should be credited: * - * static void credit_entropy_bits(size_t nbits) + * static void credit_init_bits(size_t bits) * - * Finally, extract entropy via these two, with the latter one - * setting the entropy count to zero and extracting only if there - * is POOL_MIN_BITS entropy credited prior or force is true: + * Finally, extract entropy via: * - * static void extract_entropy(void *buf, size_t nbytes) - * static bool drain_entropy(void *buf, size_t nbytes, bool force) + * static void extract_entropy(void *buf, size_t len) * **********************************************************************/ enum { POOL_BITS = BLAKE2S_HASH_SIZE * 8, - POOL_MIN_BITS = POOL_BITS /* No point in settling for less. */ + POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */ + POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */ }; -/* For notifying userspace should write into /dev/random. */ -static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); - static struct { struct blake2s_state hash; spinlock_t lock; - unsigned int entropy_count; + unsigned int init_bits; } input_pool = { .hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE), BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4, @@ -797,48 +635,30 @@ static struct { .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), }; -static void _mix_pool_bytes(const void *in, size_t nbytes) +static void _mix_pool_bytes(const void *buf, size_t len) { - blake2s_update(&input_pool.hash, in, nbytes); + blake2s_update(&input_pool.hash, buf, len); } /* - * This function adds bytes into the entropy "pool". It does not - * update the entropy estimate. The caller should call - * credit_entropy_bits if this is appropriate. + * This function adds bytes into the input pool. It does not + * update the initialization bit counter; the caller should call + * credit_init_bits if this is appropriate. */ -static void mix_pool_bytes(const void *in, size_t nbytes) +static void mix_pool_bytes(const void *buf, size_t len) { unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(in, nbytes); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } -static void credit_entropy_bits(size_t nbits) -{ - unsigned int entropy_count, orig, add; - - if (!nbits) - return; - - add = min_t(size_t, nbits, POOL_BITS); - - do { - orig = READ_ONCE(input_pool.entropy_count); - entropy_count = min_t(unsigned int, POOL_BITS, orig + add); - } while (cmpxchg(&input_pool.entropy_count, orig, entropy_count) != orig); - - if (!crng_ready() && entropy_count >= POOL_MIN_BITS) - crng_reseed(false); -} - /* * This is an HKDF-like construction for using the hashed collected entropy * as a PRF key, that's then expanded block-by-block. */ -static void extract_entropy(void *buf, size_t nbytes) +static void extract_entropy(void *buf, size_t len) { unsigned long flags; u8 seed[BLAKE2S_HASH_SIZE], next_key[BLAKE2S_HASH_SIZE]; @@ -867,12 +687,12 @@ static void extract_entropy(void *buf, size_t nbytes) spin_unlock_irqrestore(&input_pool.lock, flags); memzero_explicit(next_key, sizeof(next_key)); - while (nbytes) { - i = min_t(size_t, nbytes, BLAKE2S_HASH_SIZE); + while (len) { + i = min_t(size_t, len, BLAKE2S_HASH_SIZE); /* output = HASHPRF(seed, RDSEED || ++counter) */ ++block.counter; blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed)); - nbytes -= i; + len -= i; buf += i; } @@ -880,23 +700,41 @@ static void extract_entropy(void *buf, size_t nbytes) memzero_explicit(&block, sizeof(block)); } -/* - * First we make sure we have POOL_MIN_BITS of entropy in the pool unless force - * is true, and then we set the entropy count to zero (but don't actually touch - * any data). Only then can we extract a new key with extract_entropy(). - */ -static bool drain_entropy(void *buf, size_t nbytes, bool force) +#define credit_init_bits(bits) if (!crng_ready()) _credit_init_bits(bits) + +static void __cold _credit_init_bits(size_t bits) { - unsigned int entropy_count; + unsigned int new, orig, add; + unsigned long flags; + + if (!bits) + return; + + add = min_t(size_t, bits, POOL_BITS); + do { - entropy_count = READ_ONCE(input_pool.entropy_count); - if (!force && entropy_count < POOL_MIN_BITS) - return false; - } while (cmpxchg(&input_pool.entropy_count, entropy_count, 0) != entropy_count); - extract_entropy(buf, nbytes); - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - return true; + orig = READ_ONCE(input_pool.init_bits); + new = min_t(unsigned int, POOL_BITS, orig + add); + } while (cmpxchg(&input_pool.init_bits, orig, new) != orig); + + if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { + crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (urandom_warning.missed) + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + } else if (orig < POOL_EARLY_BITS && new >= POOL_EARLY_BITS) { + spin_lock_irqsave(&base_crng.lock, flags); + /* Check if crng_init is CRNG_EMPTY, to avoid race with crng_reseed(). */ + if (crng_init == CRNG_EMPTY) { + extract_entropy(base_crng.key, sizeof(base_crng.key)); + crng_init = CRNG_EARLY; + } + spin_unlock_irqrestore(&base_crng.lock, flags); + } } @@ -907,15 +745,13 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force) * The following exported functions are used for pushing entropy into * the above entropy accumulation routines: * - * void add_device_randomness(const void *buf, size_t size); - * void add_input_randomness(unsigned int type, unsigned int code, - * unsigned int value); - * void add_disk_randomness(struct gendisk *disk); - * void add_hwgenerator_randomness(const void *buffer, size_t count, - * size_t entropy); - * void add_bootloader_randomness(const void *buf, size_t size); - * void add_vmfork_randomness(const void *unique_vm_id, size_t size); + * void add_device_randomness(const void *buf, size_t len); + * void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy); + * void add_bootloader_randomness(const void *buf, size_t len); + * void add_vmfork_randomness(const void *unique_vm_id, size_t len); * void add_interrupt_randomness(int irq); + * void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); + * void add_disk_randomness(struct gendisk *disk); * * add_device_randomness() adds data to the input pool that * is likely to differ between two devices (or possibly even per boot). @@ -925,26 +761,13 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force) * that might otherwise be identical and have very little entropy * available to them (particularly common in the embedded world). * - * add_input_randomness() uses the input layer interrupt timing, as well - * as the event type information from the hardware. - * - * add_disk_randomness() uses what amounts to the seek time of block - * layer request events, on a per-disk_devt basis, as input to the - * entropy pool. Note that high-speed solid state drives with very low - * seek times do not make for good sources of entropy, as their seek - * times are usually fairly consistent. - * - * The above two routines try to estimate how many bits of entropy - * to credit. They do this by keeping track of the first and second - * order deltas of the event timings. - * * add_hwgenerator_randomness() is for true hardware RNGs, and will credit * entropy as specified by the caller. If the entropy pool is full it will * block until more entropy is needed. * - * add_bootloader_randomness() is the same as add_hwgenerator_randomness() or - * add_device_randomness(), depending on whether or not the configuration - * option CONFIG_RANDOM_TRUST_BOOTLOADER is set. + * add_bootloader_randomness() is called by bootloader drivers, such as EFI + * and device tree, and credits its input depending on whether or not the + * configuration option CONFIG_RANDOM_TRUST_BOOTLOADER is set. * * add_vmfork_randomness() adds a unique (but not necessarily secret) ID * representing the current instance of a VM to the pool, without crediting, @@ -955,10 +778,23 @@ static bool drain_entropy(void *buf, size_t nbytes, bool force) * as inputs, it feeds the input pool roughly once a second or after 64 * interrupts, crediting 1 bit of entropy for whichever comes first. * + * add_input_randomness() uses the input layer interrupt timing, as well + * as the event type information from the hardware. + * + * add_disk_randomness() uses what amounts to the seek time of block + * layer request events, on a per-disk_devt basis, as input to the + * entropy pool. Note that high-speed solid state drives with very low + * seek times do not make for good sources of entropy, as their seek + * times are usually fairly consistent. + * + * The last two routines try to estimate how many bits of entropy + * to credit. They do this by keeping track of the first and second + * order deltas of the event timings. + * **********************************************************************/ -static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); -static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); +static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); +static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); static int __init parse_trust_cpu(char *arg) { return kstrtobool(arg, &trust_cpu); @@ -972,46 +808,42 @@ early_param("random.trust_bootloader", parse_trust_bootloader); /* * The first collection of entropy occurs at system boot while interrupts - * are still turned off. Here we push in RDSEED, a timestamp, and utsname(). - * Depending on the above configuration knob, RDSEED may be considered - * sufficient for initialization. Note that much earlier setup may already - * have pushed entropy into the input pool by the time we get here. + * are still turned off. Here we push in latent entropy, RDSEED, a timestamp, + * utsname(), and the command line. Depending on the above configuration knob, + * RDSEED may be considered sufficient for initialization. Note that much + * earlier setup may already have pushed entropy into the input pool by the + * time we get here. */ -int __init rand_initialize(void) +int __init random_init(const char *command_line) { - size_t i; ktime_t now = ktime_get_real(); - bool arch_init = true; - unsigned long rv; + unsigned int i, arch_bits; + unsigned long entropy; #if defined(LATENT_ENTROPY_PLUGIN) static const u8 compiletime_seed[BLAKE2S_BLOCK_SIZE] __initconst __latent_entropy; _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); #endif - for (i = 0; i < BLAKE2S_BLOCK_SIZE; i += sizeof(rv)) { - if (!arch_get_random_seed_long_early(&rv) && - !arch_get_random_long_early(&rv)) { - rv = random_get_entropy(); - arch_init = false; + for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8; + i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) { + if (!arch_get_random_seed_long_early(&entropy) && + !arch_get_random_long_early(&entropy)) { + entropy = random_get_entropy(); + arch_bits -= sizeof(entropy) * 8; } - _mix_pool_bytes(&rv, sizeof(rv)); + _mix_pool_bytes(&entropy, sizeof(entropy)); } _mix_pool_bytes(&now, sizeof(now)); _mix_pool_bytes(utsname(), sizeof(*(utsname()))); + _mix_pool_bytes(command_line, strlen(command_line)); + add_latent_entropy(); - extract_entropy(base_crng.key, sizeof(base_crng.key)); - ++base_crng.generation; - - if (arch_init && trust_cpu && !crng_ready()) { - crng_init = 2; - pr_notice("crng init done (trusting CPU's manufacturer)\n"); - } + if (crng_ready()) + crng_reseed(); + else if (trust_cpu) + _credit_init_bits(arch_bits); - if (ratelimit_disable) { - urandom_warning.interval = 0; - unseeded_warning.interval = 0; - } return 0; } @@ -1023,166 +855,47 @@ int __init rand_initialize(void) * the entropy pool having similar initial state across largely * identical devices. */ -void add_device_randomness(const void *buf, size_t size) +void add_device_randomness(const void *buf, size_t len) { - unsigned long cycles = random_get_entropy(); - unsigned long flags, now = jiffies; - - if (crng_init == 0 && size) - crng_pre_init_inject(buf, size, false); + unsigned long entropy = random_get_entropy(); + unsigned long flags; spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); - _mix_pool_bytes(buf, size); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(buf, len); spin_unlock_irqrestore(&input_pool.lock, flags); } EXPORT_SYMBOL(add_device_randomness); -/* There is one of these per entropy source */ -struct timer_rand_state { - unsigned long last_time; - long last_delta, last_delta2; -}; - -/* - * This function adds entropy to the entropy "pool" by using timing - * delays. It uses the timer_rand_state structure to make an estimate - * of how many bits of entropy this call has added to the pool. - * - * The number "num" is also added to the pool - it should somehow describe - * the type of event which just happened. This is currently 0-255 for - * keyboard scan codes, and 256 upwards for interrupts. - */ -static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) -{ - unsigned long cycles = random_get_entropy(), now = jiffies, flags; - long delta, delta2, delta3; - - spin_lock_irqsave(&input_pool.lock, flags); - _mix_pool_bytes(&cycles, sizeof(cycles)); - _mix_pool_bytes(&now, sizeof(now)); - _mix_pool_bytes(&num, sizeof(num)); - spin_unlock_irqrestore(&input_pool.lock, flags); - - /* - * Calculate number of bits of randomness we probably added. - * We take into account the first, second and third-order deltas - * in order to make our estimate. - */ - delta = now - READ_ONCE(state->last_time); - WRITE_ONCE(state->last_time, now); - - delta2 = delta - READ_ONCE(state->last_delta); - WRITE_ONCE(state->last_delta, delta); - - delta3 = delta2 - READ_ONCE(state->last_delta2); - WRITE_ONCE(state->last_delta2, delta2); - - if (delta < 0) - delta = -delta; - if (delta2 < 0) - delta2 = -delta2; - if (delta3 < 0) - delta3 = -delta3; - if (delta > delta2) - delta = delta2; - if (delta > delta3) - delta = delta3; - - /* - * delta is now minimum absolute delta. - * Round down by 1 bit on general principles, - * and limit entropy estimate to 12 bits. - */ - credit_entropy_bits(min_t(unsigned int, fls(delta >> 1), 11)); -} - -void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value) -{ - static unsigned char last_value; - static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; - - /* Ignore autorepeat and the like. */ - if (value == last_value) - return; - - last_value = value; - add_timer_randomness(&input_timer_state, - (type << 4) ^ code ^ (code >> 4) ^ value); -} -EXPORT_SYMBOL_GPL(add_input_randomness); - -#ifdef CONFIG_BLOCK -void add_disk_randomness(struct gendisk *disk) -{ - if (!disk || !disk->random) - return; - /* First major is 1, so we get >= 0x200 here. */ - add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); -} -EXPORT_SYMBOL_GPL(add_disk_randomness); - -void rand_initialize_disk(struct gendisk *disk) -{ - struct timer_rand_state *state; - - /* - * If kzalloc returns null, we just won't use that entropy - * source. - */ - state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); - if (state) { - state->last_time = INITIAL_JIFFIES; - disk->random = state; - } -} -#endif - /* * Interface for in-kernel drivers of true hardware RNGs. * Those devices may produce endless random bits and will be throttled * when our pool is full. */ -void add_hwgenerator_randomness(const void *buffer, size_t count, - size_t entropy) +void add_hwgenerator_randomness(const void *buf, size_t len, size_t entropy) { - if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, count); - return; - } + mix_pool_bytes(buf, len); + credit_init_bits(entropy); /* - * Throttle writing if we're above the trickle threshold. - * We'll be woken up again once below POOL_MIN_BITS, when - * the calling thread is about to terminate, or once - * CRNG_RESEED_INTERVAL has elapsed. + * Throttle writing to once every CRNG_RESEED_INTERVAL, unless + * we're not yet initialized. */ - wait_event_interruptible_timeout(random_write_wait, - !system_wq || kthread_should_stop() || - input_pool.entropy_count < POOL_MIN_BITS, - CRNG_RESEED_INTERVAL); - mix_pool_bytes(buffer, count); - credit_entropy_bits(entropy); + if (!kthread_should_stop() && crng_ready()) + schedule_timeout_interruptible(CRNG_RESEED_INTERVAL); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); /* - * Handle random seed passed by bootloader. - * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise - * it would be regarded as device data. - * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + * Handle random seed passed by bootloader, and credit it if + * CONFIG_RANDOM_TRUST_BOOTLOADER is set. */ -void add_bootloader_randomness(const void *buf, size_t size) +void __init add_bootloader_randomness(const void *buf, size_t len) { + mix_pool_bytes(buf, len); if (trust_bootloader) - add_hwgenerator_randomness(buf, size, size * 8); - else - add_device_randomness(buf, size); + credit_init_bits(len * 8); } -EXPORT_SYMBOL_GPL(add_bootloader_randomness); #if IS_ENABLED(CONFIG_VMGENID) static BLOCKING_NOTIFIER_HEAD(vmfork_chain); @@ -1192,11 +905,11 @@ static BLOCKING_NOTIFIER_HEAD(vmfork_chain); * don't credit it, but we do immediately force a reseed after so * that it's used by the crng posthaste. */ -void add_vmfork_randomness(const void *unique_vm_id, size_t size) +void __cold add_vmfork_randomness(const void *unique_vm_id, size_t len) { - add_device_randomness(unique_vm_id, size); + add_device_randomness(unique_vm_id, len); if (crng_ready()) { - crng_reseed(true); + crng_reseed(); pr_notice("crng reseeded due to virtual machine fork\n"); } blocking_notifier_call_chain(&vmfork_chain, 0, NULL); @@ -1205,13 +918,13 @@ void add_vmfork_randomness(const void *unique_vm_id, size_t size) EXPORT_SYMBOL_GPL(add_vmfork_randomness); #endif -int register_random_vmfork_notifier(struct notifier_block *nb) +int __cold register_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&vmfork_chain, nb); } EXPORT_SYMBOL_GPL(register_random_vmfork_notifier); -int unregister_random_vmfork_notifier(struct notifier_block *nb) +int __cold unregister_random_vmfork_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&vmfork_chain, nb); } @@ -1223,17 +936,15 @@ struct fast_pool { unsigned long pool[4]; unsigned long last; unsigned int count; - u16 reg_idx; }; static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { #ifdef CONFIG_64BIT - /* SipHash constants */ - .pool = { 0x736f6d6570736575UL, 0x646f72616e646f6dUL, - 0x6c7967656e657261UL, 0x7465646279746573UL } +#define FASTMIX_PERM SIPHASH_PERMUTATION + .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 } #else - /* HalfSipHash constants */ - .pool = { 0, 0, 0x6c796765U, 0x74656462U } +#define FASTMIX_PERM HSIPHASH_PERMUTATION + .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 } #endif }; @@ -1241,27 +952,16 @@ static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = { * This is [Half]SipHash-1-x, starting from an empty key. Because * the key is fixed, it assumes that its inputs are non-malicious, * and therefore this has no security on its own. s represents the - * 128 or 256-bit SipHash state, while v represents a 128-bit input. + * four-word SipHash state, while v represents a two-word input. */ -static void fast_mix(unsigned long s[4], const unsigned long *v) +static void fast_mix(unsigned long s[4], unsigned long v1, unsigned long v2) { - size_t i; - - for (i = 0; i < 16 / sizeof(long); ++i) { - s[3] ^= v[i]; -#ifdef CONFIG_64BIT - s[0] += s[1]; s[1] = rol64(s[1], 13); s[1] ^= s[0]; s[0] = rol64(s[0], 32); - s[2] += s[3]; s[3] = rol64(s[3], 16); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol64(s[3], 21); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol64(s[1], 17); s[1] ^= s[2]; s[2] = rol64(s[2], 32); -#else - s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); - s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; - s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; - s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); -#endif - s[0] ^= v[i]; - } + s[3] ^= v1; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v1; + s[3] ^= v2; + FASTMIX_PERM(s[0], s[1], s[2], s[3]); + s[0] ^= v2; } #ifdef CONFIG_SMP @@ -1269,7 +969,7 @@ static void fast_mix(unsigned long s[4], const unsigned long *v) * This function is called when the CPU has just come online, with * entry CPUHP_AP_RANDOM_ONLINE, just after CPUHP_AP_WORKQUEUE_ONLINE. */ -int random_online_cpu(unsigned int cpu) +int __cold random_online_cpu(unsigned int cpu) { /* * During CPU shutdown and before CPU onlining, add_interrupt_ @@ -1287,33 +987,18 @@ int random_online_cpu(unsigned int cpu) } #endif -static unsigned long get_reg(struct fast_pool *f, struct pt_regs *regs) -{ - unsigned long *ptr = (unsigned long *)regs; - unsigned int idx; - - if (regs == NULL) - return 0; - idx = READ_ONCE(f->reg_idx); - if (idx >= sizeof(struct pt_regs) / sizeof(unsigned long)) - idx = 0; - ptr += idx++; - WRITE_ONCE(f->reg_idx, idx); - return *ptr; -} - static void mix_interrupt_randomness(struct work_struct *work) { struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix); /* - * The size of the copied stack pool is explicitly 16 bytes so that we - * tax mix_pool_byte()'s compression function the same amount on all - * platforms. This means on 64-bit we copy half the pool into this, - * while on 32-bit we copy all of it. The entropy is supposed to be - * sufficiently dispersed between bits that in the sponge-like - * half case, on average we don't wind up "losing" some. + * The size of the copied stack pool is explicitly 2 longs so that we + * only ever ingest half of the siphash output each time, retaining + * the other half as the next "key" that carries over. The entropy is + * supposed to be sufficiently dispersed between bits so on average + * we don't wind up "losing" some. */ - u8 pool[16]; + unsigned long pool[2]; + unsigned int count; /* Check to see if we're running on the wrong CPU due to hotplug. */ local_irq_disable(); @@ -1327,17 +1012,13 @@ static void mix_interrupt_randomness(struct work_struct *work) * consistent view, before we reenable irqs again. */ memcpy(pool, fast_pool->pool, sizeof(pool)); + count = fast_pool->count; fast_pool->count = 0; fast_pool->last = jiffies; local_irq_enable(); - if (unlikely(crng_init == 0)) { - crng_pre_init_inject(pool, sizeof(pool), true); - mix_pool_bytes(pool, sizeof(pool)); - } else { - mix_pool_bytes(pool, sizeof(pool)); - credit_entropy_bits(1); - } + mix_pool_bytes(pool, sizeof(pool)); + credit_init_bits(max(1u, (count & U16_MAX) / 64)); memzero_explicit(pool, sizeof(pool)); } @@ -1345,37 +1026,19 @@ static void mix_interrupt_randomness(struct work_struct *work) void add_interrupt_randomness(int irq) { enum { MIX_INFLIGHT = 1U << 31 }; - unsigned long cycles = random_get_entropy(), now = jiffies; + unsigned long entropy = random_get_entropy(); struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); struct pt_regs *regs = get_irq_regs(); unsigned int new_count; - union { - u32 u32[4]; - u64 u64[2]; - unsigned long longs[16 / sizeof(long)]; - } irq_data; - - if (cycles == 0) - cycles = get_reg(fast_pool, regs); - - if (sizeof(unsigned long) == 8) { - irq_data.u64[0] = cycles ^ rol64(now, 32) ^ irq; - irq_data.u64[1] = regs ? instruction_pointer(regs) : _RET_IP_; - } else { - irq_data.u32[0] = cycles ^ irq; - irq_data.u32[1] = now; - irq_data.u32[2] = regs ? instruction_pointer(regs) : _RET_IP_; - irq_data.u32[3] = get_reg(fast_pool, regs); - } - fast_mix(fast_pool->pool, irq_data.longs); + fast_mix(fast_pool->pool, entropy, + (regs ? instruction_pointer(regs) : _RET_IP_) ^ swab(irq)); new_count = ++fast_pool->count; if (new_count & MIX_INFLIGHT) return; - if (new_count < 64 && (!time_after(now, fast_pool->last + HZ) || - unlikely(crng_init == 0))) + if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ)) return; if (unlikely(!fast_pool->mix.func)) @@ -1385,6 +1048,126 @@ void add_interrupt_randomness(int irq) } EXPORT_SYMBOL_GPL(add_interrupt_randomness); +/* There is one of these per entropy source */ +struct timer_rand_state { + unsigned long last_time; + long last_delta, last_delta2; +}; + +/* + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how many bits of entropy this call has added to the pool. The + * value "num" is also added to the pool; it should somehow describe + * the type of event that just happened. + */ +static void add_timer_randomness(struct timer_rand_state *state, unsigned int num) +{ + unsigned long entropy = random_get_entropy(), now = jiffies, flags; + long delta, delta2, delta3; + unsigned int bits; + + /* + * If we're in a hard IRQ, add_interrupt_randomness() will be called + * sometime after, so mix into the fast pool. + */ + if (in_hardirq()) { + fast_mix(this_cpu_ptr(&irq_randomness)->pool, entropy, num); + } else { + spin_lock_irqsave(&input_pool.lock, flags); + _mix_pool_bytes(&entropy, sizeof(entropy)); + _mix_pool_bytes(&num, sizeof(num)); + spin_unlock_irqrestore(&input_pool.lock, flags); + } + + if (crng_ready()) + return; + + /* + * Calculate number of bits of randomness we probably added. + * We take into account the first, second and third-order deltas + * in order to make our estimate. + */ + delta = now - READ_ONCE(state->last_time); + WRITE_ONCE(state->last_time, now); + + delta2 = delta - READ_ONCE(state->last_delta); + WRITE_ONCE(state->last_delta, delta); + + delta3 = delta2 - READ_ONCE(state->last_delta2); + WRITE_ONCE(state->last_delta2, delta2); + + if (delta < 0) + delta = -delta; + if (delta2 < 0) + delta2 = -delta2; + if (delta3 < 0) + delta3 = -delta3; + if (delta > delta2) + delta = delta2; + if (delta > delta3) + delta = delta3; + + /* + * delta is now minimum absolute delta. Round down by 1 bit + * on general principles, and limit entropy estimate to 11 bits. + */ + bits = min(fls(delta >> 1), 11); + + /* + * As mentioned above, if we're in a hard IRQ, add_interrupt_randomness() + * will run after this, which uses a different crediting scheme of 1 bit + * per every 64 interrupts. In order to let that function do accounting + * close to the one in this function, we credit a full 64/64 bit per bit, + * and then subtract one to account for the extra one added. + */ + if (in_hardirq()) + this_cpu_ptr(&irq_randomness)->count += max(1u, bits * 64) - 1; + else + _credit_init_bits(bits); +} + +void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) +{ + static unsigned char last_value; + static struct timer_rand_state input_timer_state = { INITIAL_JIFFIES }; + + /* Ignore autorepeat and the like. */ + if (value == last_value) + return; + + last_value = value; + add_timer_randomness(&input_timer_state, + (type << 4) ^ code ^ (code >> 4) ^ value); +} +EXPORT_SYMBOL_GPL(add_input_randomness); + +#ifdef CONFIG_BLOCK +void add_disk_randomness(struct gendisk *disk) +{ + if (!disk || !disk->random) + return; + /* First major is 1, so we get >= 0x200 here. */ + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); +} +EXPORT_SYMBOL_GPL(add_disk_randomness); + +void __cold rand_initialize_disk(struct gendisk *disk) +{ + struct timer_rand_state *state; + + /* + * If kzalloc returns null, we just won't use that entropy + * source. + */ + state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL); + if (state) { + state->last_time = INITIAL_JIFFIES; + disk->random = state; + } +} +#endif + /* * Each time the timer fires, we expect that we got an unpredictable * jump in the cycle counter. Even if the timer is running on another @@ -1398,40 +1181,40 @@ EXPORT_SYMBOL_GPL(add_interrupt_randomness); * * So the re-arming always happens in the entropy loop itself. */ -static void entropy_timer(struct timer_list *t) +static void __cold entropy_timer(struct timer_list *t) { - credit_entropy_bits(1); + credit_init_bits(1); } /* * If we have an actual cycle counter, see if we can * generate enough entropy with timing noise */ -static void try_to_generate_entropy(void) +static void __cold try_to_generate_entropy(void) { struct { - unsigned long cycles; + unsigned long entropy; struct timer_list timer; } stack; - stack.cycles = random_get_entropy(); + stack.entropy = random_get_entropy(); /* Slow counter - or none. Don't even bother */ - if (stack.cycles == random_get_entropy()) + if (stack.entropy == random_get_entropy()) return; timer_setup_on_stack(&stack.timer, entropy_timer, 0); while (!crng_ready() && !signal_pending(current)) { if (!timer_pending(&stack.timer)) mod_timer(&stack.timer, jiffies + 1); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); schedule(); - stack.cycles = random_get_entropy(); + stack.entropy = random_get_entropy(); } del_timer_sync(&stack.timer); destroy_timer_on_stack(&stack.timer); - mix_pool_bytes(&stack.cycles, sizeof(stack.cycles)); + mix_pool_bytes(&stack.entropy, sizeof(stack.entropy)); } @@ -1463,9 +1246,12 @@ static void try_to_generate_entropy(void) * **********************************************************************/ -SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, - flags) +SYSCALL_DEFINE3(getrandom, char __user *, ubuf, size_t, len, unsigned int, flags) { + struct iov_iter iter; + struct iovec iov; + int ret; + if (flags & ~(GRND_NONBLOCK | GRND_RANDOM | GRND_INSECURE)) return -EINVAL; @@ -1476,72 +1262,60 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, if ((flags & (GRND_INSECURE | GRND_RANDOM)) == (GRND_INSECURE | GRND_RANDOM)) return -EINVAL; - if (count > INT_MAX) - count = INT_MAX; - - if (!(flags & GRND_INSECURE) && !crng_ready()) { - int ret; - + if (!crng_ready() && !(flags & GRND_INSECURE)) { if (flags & GRND_NONBLOCK) return -EAGAIN; ret = wait_for_random_bytes(); if (unlikely(ret)) return ret; } - return get_random_bytes_user(buf, count); + + ret = import_single_range(READ, ubuf, len, &iov, &iter); + if (unlikely(ret)) + return ret; + return get_random_bytes_user(&iter); } static __poll_t random_poll(struct file *file, poll_table *wait) { - __poll_t mask; - poll_wait(file, &crng_init_wait, wait); - poll_wait(file, &random_write_wait, wait); - mask = 0; - if (crng_ready()) - mask |= EPOLLIN | EPOLLRDNORM; - if (input_pool.entropy_count < POOL_MIN_BITS) - mask |= EPOLLOUT | EPOLLWRNORM; - return mask; + return crng_ready() ? EPOLLIN | EPOLLRDNORM : EPOLLOUT | EPOLLWRNORM; } -static int write_pool(const char __user *ubuf, size_t count) +static ssize_t write_pool_user(struct iov_iter *iter) { - size_t len; - int ret = 0; u8 block[BLAKE2S_BLOCK_SIZE]; + ssize_t ret = 0; + size_t copied; - while (count) { - len = min(count, sizeof(block)); - if (copy_from_user(block, ubuf, len)) { - ret = -EFAULT; - goto out; + if (unlikely(!iov_iter_count(iter))) + return 0; + + for (;;) { + copied = copy_from_iter(block, sizeof(block), iter); + ret += copied; + mix_pool_bytes(block, copied); + if (!iov_iter_count(iter) || copied != sizeof(block)) + break; + + BUILD_BUG_ON(PAGE_SIZE % sizeof(block) != 0); + if (ret % PAGE_SIZE == 0) { + if (signal_pending(current)) + break; + cond_resched(); } - count -= len; - ubuf += len; - mix_pool_bytes(block, len); - cond_resched(); } -out: memzero_explicit(block, sizeof(block)); - return ret; + return ret ? ret : -EFAULT; } -static ssize_t random_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t random_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { - int ret; - - ret = write_pool(buffer, count); - if (ret) - return ret; - - return (ssize_t)count; + return write_pool_user(iter); } -static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t urandom_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { static int maxwarn = 10; @@ -1552,37 +1326,38 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, if (!crng_ready()) try_to_generate_entropy(); - if (!crng_ready() && maxwarn > 0) { - maxwarn--; - if (__ratelimit(&urandom_warning)) - pr_notice("%s: uninitialized urandom read (%zd bytes read)\n", - current->comm, nbytes); + if (!crng_ready()) { + if (!ratelimit_disable && maxwarn <= 0) + ++urandom_warning.missed; + else if (ratelimit_disable || __ratelimit(&urandom_warning)) { + --maxwarn; + pr_notice("%s: uninitialized urandom read (%zu bytes read)\n", + current->comm, iov_iter_count(iter)); + } } - return get_random_bytes_user(buf, nbytes); + return get_random_bytes_user(iter); } -static ssize_t random_read(struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos) +static ssize_t random_read_iter(struct kiocb *kiocb, struct iov_iter *iter) { int ret; ret = wait_for_random_bytes(); if (ret != 0) return ret; - return get_random_bytes_user(buf, nbytes); + return get_random_bytes_user(iter); } static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) { - int size, ent_count; int __user *p = (int __user *)arg; - int retval; + int ent_count; switch (cmd) { case RNDGETENTCNT: /* Inherently racy, no point locking. */ - if (put_user(input_pool.entropy_count, p)) + if (put_user(input_pool.init_bits, p)) return -EFAULT; return 0; case RNDADDTOENTCNT: @@ -1592,41 +1367,46 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EFAULT; if (ent_count < 0) return -EINVAL; - credit_entropy_bits(ent_count); + credit_init_bits(ent_count); return 0; - case RNDADDENTROPY: + case RNDADDENTROPY: { + struct iov_iter iter; + struct iovec iov; + ssize_t ret; + int len; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ent_count, p++)) return -EFAULT; if (ent_count < 0) return -EINVAL; - if (get_user(size, p++)) + if (get_user(len, p++)) return -EFAULT; - retval = write_pool((const char __user *)p, size); - if (retval < 0) - return retval; - credit_entropy_bits(ent_count); + ret = import_single_range(WRITE, p, len, &iov, &iter); + if (unlikely(ret)) + return ret; + ret = write_pool_user(&iter); + if (unlikely(ret < 0)) + return ret; + /* Since we're crediting, enforce that it was all written into the pool. */ + if (unlikely(ret != len)) + return -EFAULT; + credit_init_bits(ent_count); return 0; + } case RNDZAPENTCNT: case RNDCLEARPOOL: - /* - * Clear the entropy pool counters. We no longer clear - * the entropy pool, as that's silly. - */ + /* No longer has any effect. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (xchg(&input_pool.entropy_count, 0) >= POOL_MIN_BITS) { - wake_up_interruptible(&random_write_wait); - kill_fasync(&fasync, SIGIO, POLL_OUT); - } return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!crng_ready()) return -ENODATA; - crng_reseed(false); + crng_reseed(); return 0; default: return -EINVAL; @@ -1639,22 +1419,26 @@ static int random_fasync(int fd, struct file *filp, int on) } const struct file_operations random_fops = { - .read = random_read, - .write = random_write, + .read_iter = random_read_iter, + .write_iter = random_write_iter, .poll = random_poll, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; const struct file_operations urandom_fops = { - .read = urandom_read, - .write = random_write, + .read_iter = urandom_read_iter, + .write_iter = random_write_iter, .unlocked_ioctl = random_ioctl, .compat_ioctl = compat_ptr_ioctl, .fasync = random_fasync, .llseek = noop_llseek, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, }; @@ -1678,7 +1462,7 @@ const struct file_operations urandom_fops = { * * - write_wakeup_threshold - the amount of entropy in the input pool * below which write polls to /dev/random will unblock, requesting - * more entropy, tied to the POOL_MIN_BITS constant. It is writable + * more entropy, tied to the POOL_READY_BITS constant. It is writable * to avoid breaking old userspaces, but writing to it does not * change any behavior of the RNG. * @@ -1693,7 +1477,7 @@ const struct file_operations urandom_fops = { #include static int sysctl_random_min_urandom_seed = CRNG_RESEED_INTERVAL / HZ; -static int sysctl_random_write_wakeup_bits = POOL_MIN_BITS; +static int sysctl_random_write_wakeup_bits = POOL_READY_BITS; static int sysctl_poolsize = POOL_BITS; static u8 sysctl_bootid[UUID_SIZE]; @@ -1702,7 +1486,7 @@ static u8 sysctl_bootid[UUID_SIZE]; * UUID. The difference is in whether table->data is NULL; if it is, * then a new UUID is generated and returned to the user. */ -static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, +static int proc_do_uuid(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { u8 tmp_uuid[UUID_SIZE], *uuid; @@ -1729,14 +1513,14 @@ static int proc_do_uuid(struct ctl_table *table, int write, void *buffer, } snprintf(uuid_string, sizeof(uuid_string), "%pU", uuid); - return proc_dostring(&fake_table, 0, buffer, lenp, ppos); + return proc_dostring(&fake_table, 0, buf, lenp, ppos); } /* The same as proc_dointvec, but writes don't change anything. */ -static int proc_do_rointvec(struct ctl_table *table, int write, void *buffer, +static int proc_do_rointvec(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos) { - return write ? 0 : proc_dointvec(table, 0, buffer, lenp, ppos); + return write ? 0 : proc_dointvec(table, 0, buf, lenp, ppos); } static struct ctl_table random_table[] = { @@ -1749,7 +1533,7 @@ static struct ctl_table random_table[] = { }, { .procname = "entropy_avail", - .data = &input_pool.entropy_count, + .data = &input_pool.init_bits, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, @@ -1783,8 +1567,8 @@ static struct ctl_table random_table[] = { }; /* - * rand_initialize() is called before sysctl_init(), - * so we cannot call register_sysctl_init() in rand_initialize() + * random_init() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in random_init() */ static int __init random_sysctls_init(void) { diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 4704fa553098b..04a3e23a4afc7 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -400,7 +400,16 @@ ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, if (!rc) { out = (struct tpm2_get_cap_out *) &buf.data[TPM_HEADER_SIZE]; - *value = be32_to_cpu(out->value); + /* + * To prevent failing boot up of some systems, Infineon TPM2.0 + * returns SUCCESS on TPM2_Startup in field upgrade mode. Also + * the TPM2_Getcapability command returns a zero length list + * in field upgrade mode. + */ + if (be32_to_cpu(out->property_cnt) > 0) + *value = be32_to_cpu(out->value); + else + rc = -ENODATA; } tpm_buf_destroy(&buf); return rc; diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 3af4c07a9342f..d3989b257f422 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -681,6 +681,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, if (!wait_event_timeout(ibmvtpm->crq_queue.wq, ibmvtpm->rtce_buf != NULL, HZ)) { + rc = -ENODEV; dev_err(dev, "CRQ response timed out\n"); goto init_irq_cleanup; } diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c index f6c0affbb4567..bf608b6af3395 100644 --- a/drivers/char/tpm/tpm_tis_i2c_cr50.c +++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c @@ -768,8 +768,8 @@ static int tpm_cr50_i2c_remove(struct i2c_client *client) struct device *dev = &client->dev; if (!chip) { - dev_err(dev, "Could not get client data at remove\n"); - return -ENODEV; + dev_crit(dev, "Could not get client data at remove, memory corruption ahead\n"); + return 0; } tpm_chip_unregister(chip); diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index dc3551796e5ed..39bcbfd908b46 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -549,6 +549,7 @@ static void cleanup_dev(struct kref *kref) if (xdev->workq) destroy_workqueue(xdev->workq); + usb_put_dev(xdev->udev); kfree(xdev->channels); /* Argument may be NULL, and that's fine */ kfree(xdev); } diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c index 6144447f86c63..62238dca9a534 100644 --- a/drivers/clk/tegra/clk-dfll.c +++ b/drivers/clk/tegra/clk-dfll.c @@ -271,6 +271,7 @@ struct tegra_dfll { struct clk *ref_clk; struct clk *i2c_clk; struct clk *dfll_clk; + struct reset_control *dfll_rst; struct reset_control *dvco_rst; unsigned long ref_rate; unsigned long i2c_clk_rate; @@ -1464,6 +1465,7 @@ static int dfll_init(struct tegra_dfll *td) return -EINVAL; } + reset_control_deassert(td->dfll_rst); reset_control_deassert(td->dvco_rst); ret = clk_prepare(td->ref_clk); @@ -1509,6 +1511,7 @@ static int dfll_init(struct tegra_dfll *td) clk_unprepare(td->ref_clk); reset_control_assert(td->dvco_rst); + reset_control_assert(td->dfll_rst); return ret; } @@ -1530,6 +1533,7 @@ int tegra_dfll_suspend(struct device *dev) } reset_control_assert(td->dvco_rst); + reset_control_assert(td->dfll_rst); return 0; } @@ -1548,6 +1552,7 @@ int tegra_dfll_resume(struct device *dev) { struct tegra_dfll *td = dev_get_drvdata(dev); + reset_control_deassert(td->dfll_rst); reset_control_deassert(td->dvco_rst); pm_runtime_get_sync(td->dev); @@ -1951,6 +1956,12 @@ int tegra_dfll_register(struct platform_device *pdev, td->soc = soc; + td->dfll_rst = devm_reset_control_get_optional(td->dev, "dfll"); + if (IS_ERR(td->dfll_rst)) { + dev_err(td->dev, "couldn't get dfll reset\n"); + return PTR_ERR(td->dfll_rst); + } + td->dvco_rst = devm_reset_control_get(td->dev, "dvco"); if (IS_ERR(td->dvco_rst)) { dev_err(td->dev, "couldn't get dvco reset\n"); @@ -2087,6 +2098,7 @@ struct tegra_dfll_soc_data *tegra_dfll_unregister(struct platform_device *pdev) clk_unprepare(td->i2c_clk); reset_control_assert(td->dvco_rst); + reset_control_assert(td->dfll_rst); return td->soc; } diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c index 56c0cc32d0ac6..d514b44e67dd1 100644 --- a/drivers/clocksource/timer-oxnas-rps.c +++ b/drivers/clocksource/timer-oxnas-rps.c @@ -236,7 +236,7 @@ static int __init oxnas_rps_timer_init(struct device_node *np) } rps->irq = irq_of_parse_and_map(np, 0); - if (rps->irq < 0) { + if (!rps->irq) { ret = -EINVAL; goto err_iomap; } diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 1767f8bf20133..593d5a957b69d 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -34,7 +34,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT, + .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/clocksource/timer-sp804.c b/drivers/clocksource/timer-sp804.c index 401d592e85f5a..e6a87f4af2b50 100644 --- a/drivers/clocksource/timer-sp804.c +++ b/drivers/clocksource/timer-sp804.c @@ -259,6 +259,11 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time struct clk *clk1, *clk2; const char *name = of_get_property(np, "compatible", NULL); + if (initialized) { + pr_debug("%pOF: skipping further SP804 timer device\n", np); + return 0; + } + base = of_iomap(np, 0); if (!base) return -ENXIO; @@ -270,11 +275,6 @@ static int __init sp804_of_init(struct device_node *np, struct sp804_timer *time writel(0, timer1_base + timer->ctrl); writel(0, timer2_base + timer->ctrl); - if (initialized || !of_device_is_available(np)) { - ret = -EINVAL; - goto err; - } - clk1 = of_clk_get(np, 0); if (IS_ERR(clk1)) clk1 = NULL; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 80f535cc8a757..fbaa8e6c7d232 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -28,6 +28,7 @@ #include #include #include +#include #include static LIST_HEAD(cpufreq_policy_list); @@ -1707,6 +1708,16 @@ static unsigned int cpufreq_verify_current_freq(struct cpufreq_policy *policy, b return new_freq; if (policy->cur != new_freq) { + /* + * For some platforms, the frequency returned by hardware may be + * slightly different from what is provided in the frequency + * table, for example hardware may return 499 MHz instead of 500 + * MHz. In such cases it is better to avoid getting into + * unnecessary frequency updates. + */ + if (abs(policy->cur - new_freq) < HZ_PER_MHZ) + return policy->cur; + cpufreq_out_of_sync(policy, new_freq); if (update) schedule_work(&policy->update); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 0d42cf8b88d8a..85da677c43d6b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -388,6 +388,15 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, gov->free(policy_dbs); } +static void cpufreq_dbs_data_release(struct kobject *kobj) +{ + struct dbs_data *dbs_data = to_dbs_data(to_gov_attr_set(kobj)); + struct dbs_governor *gov = dbs_data->gov; + + gov->exit(dbs_data); + kfree(dbs_data); +} + int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); @@ -425,6 +434,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) goto free_policy_dbs_info; } + dbs_data->gov = gov; gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); ret = gov->init(dbs_data); @@ -447,6 +457,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) policy->governor_data = policy_dbs; gov->kobj_type.sysfs_ops = &governor_sysfs_ops; + gov->kobj_type.release = cpufreq_dbs_data_release; ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type, get_governor_parent_kobj(policy), "%s", gov->gov.name); @@ -488,13 +499,8 @@ void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) policy->governor_data = NULL; - if (!count) { - if (!have_governor_per_policy()) - gov->gdbs_data = NULL; - - gov->exit(dbs_data); - kfree(dbs_data); - } + if (!count && !have_governor_per_policy()) + gov->gdbs_data = NULL; free_policy_dbs_info(policy_dbs, gov); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index a5a0bc3cc23ec..168c23fd7fcac 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -37,6 +37,7 @@ enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; /* Governor demand based switching data (per-policy or global). */ struct dbs_data { struct gov_attr_set attr_set; + struct dbs_governor *gov; void *tuners; unsigned int ignore_nice_load; unsigned int sampling_rate; diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 866163883b48d..bfe240c726e34 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -44,6 +44,8 @@ struct mtk_cpu_dvfs_info { bool need_voltage_tracking; }; +static struct platform_device *cpufreq_pdev; + static LIST_HEAD(dvfs_info_list); static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu) @@ -547,7 +549,6 @@ static int __init mtk_cpufreq_driver_init(void) { struct device_node *np; const struct of_device_id *match; - struct platform_device *pdev; int err; np = of_find_node_by_path("/"); @@ -571,16 +572,23 @@ static int __init mtk_cpufreq_driver_init(void) * and the device registration codes are put here to handle defer * probing. */ - pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); - if (IS_ERR(pdev)) { + cpufreq_pdev = platform_device_register_simple("mtk-cpufreq", -1, NULL, 0); + if (IS_ERR(cpufreq_pdev)) { pr_err("failed to register mtk-cpufreq platform device\n"); platform_driver_unregister(&mtk_cpufreq_platdrv); - return PTR_ERR(pdev); + return PTR_ERR(cpufreq_pdev); } return 0; } -device_initcall(mtk_cpufreq_driver_init); +module_init(mtk_cpufreq_driver_init) + +static void __exit mtk_cpufreq_driver_exit(void) +{ + platform_device_unregister(cpufreq_pdev); + platform_driver_unregister(&mtk_cpufreq_platdrv); +} +module_exit(mtk_cpufreq_driver_exit) MODULE_DESCRIPTION("MediaTek CPUFreq driver"); MODULE_AUTHOR("Pi-Cheng Chen "); diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c index 755bbdfc5b82f..3db4fca1172b4 100644 --- a/drivers/cpuidle/cpuidle-psci-domain.c +++ b/drivers/cpuidle/cpuidle-psci-domain.c @@ -52,7 +52,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi) struct generic_pm_domain *pd; struct psci_pd_provider *pd_provider; struct dev_power_governor *pd_gov; - int ret = -ENOMEM, state_count = 0; + int ret = -ENOMEM; pd = dt_idle_pd_alloc(np, psci_dt_parse_state_node); if (!pd) @@ -71,7 +71,7 @@ static int psci_pd_init(struct device_node *np, bool use_osi) pd->flags |= GENPD_FLAG_ALWAYS_ON; /* Use governor for CPU PM domains if it has some states to manage. */ - pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; + pd_gov = pd->states ? &pm_domain_cpu_gov : NULL; ret = pm_genpd_init(pd, pd_gov, false); if (ret) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index b51b5df084500..540105ca0781f 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -131,6 +132,49 @@ static int psci_idle_cpuhp_down(unsigned int cpu) return 0; } +static void psci_idle_syscore_switch(bool suspend) +{ + bool cleared = false; + struct device *dev; + int cpu; + + for_each_possible_cpu(cpu) { + dev = per_cpu_ptr(&psci_cpuidle_data, cpu)->dev; + + if (dev && suspend) { + dev_pm_genpd_suspend(dev); + } else if (dev) { + dev_pm_genpd_resume(dev); + + /* Account for userspace having offlined a CPU. */ + if (pm_runtime_status_suspended(dev)) + pm_runtime_set_active(dev); + + /* Clear domain state to re-start fresh. */ + if (!cleared) { + psci_set_domain_state(0); + cleared = true; + } + } + } +} + +static int psci_idle_syscore_suspend(void) +{ + psci_idle_syscore_switch(true); + return 0; +} + +static void psci_idle_syscore_resume(void) +{ + psci_idle_syscore_switch(false); +} + +static struct syscore_ops psci_idle_syscore_ops = { + .suspend = psci_idle_syscore_suspend, + .resume = psci_idle_syscore_resume, +}; + static void psci_idle_init_cpuhp(void) { int err; @@ -138,6 +182,8 @@ static void psci_idle_init_cpuhp(void) if (!psci_cpuidle_use_cpuhp) return; + register_syscore_ops(&psci_idle_syscore_ops); + err = cpuhp_setup_state_nocalls(CPUHP_AP_CPU_PM_STARTING, "cpuidle/psci:online", psci_idle_cpuhp_up, diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index 5c852e6719924..1151e5e2ba824 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -414,7 +414,7 @@ static int sbi_pd_init(struct device_node *np) struct generic_pm_domain *pd; struct sbi_pd_provider *pd_provider; struct dev_power_governor *pd_gov; - int ret = -ENOMEM, state_count = 0; + int ret = -ENOMEM; pd = dt_idle_pd_alloc(np, sbi_dt_parse_state_node); if (!pd) @@ -433,7 +433,7 @@ static int sbi_pd_init(struct device_node *np) pd->flags |= GENPD_FLAG_ALWAYS_ON; /* Use governor for CPU PM domains if it has some states to manage. */ - pd_gov = state_count > 0 ? &pm_domain_cpu_gov : NULL; + pd_gov = pd->states ? &pm_domain_cpu_gov : NULL; ret = pm_genpd_init(pd, pd_gov, false); if (ret) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 554e400d41cad..70e2e6e373897 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -93,6 +93,68 @@ static int sun8i_ss_cipher_fallback(struct skcipher_request *areq) return err; } +static int sun8i_ss_setup_ivs(struct skcipher_request *areq) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ss_dev *ss = op->ss; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct scatterlist *sg = areq->src; + unsigned int todo, offset; + unsigned int len = areq->cryptlen; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; + int i = 0; + u32 a; + int err; + + rctx->ivlen = ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + offset = areq->cryptlen - ivsize; + scatterwalk_map_and_copy(sf->biv, areq->src, offset, + ivsize, 0); + } + + /* we need to copy all IVs from source in case DMA is bi-directionnal */ + while (sg && len) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } + if (i == 0) + memcpy(sf->iv[0], areq->iv, ivsize); + a = dma_map_single(ss->dev, sf->iv[i], ivsize, DMA_TO_DEVICE); + if (dma_mapping_error(ss->dev, a)) { + memzero_explicit(sf->iv[i], ivsize); + dev_err(ss->dev, "Cannot DMA MAP IV\n"); + err = -EFAULT; + goto dma_iv_error; + } + rctx->p_iv[i] = a; + /* we need to setup all others IVs only in the decrypt way */ + if (rctx->op_dir & SS_ENCRYPTION) + return 0; + todo = min(len, sg_dma_len(sg)); + len -= todo; + i++; + if (i < MAX_SG) { + offset = sg->length - ivsize; + scatterwalk_map_and_copy(sf->iv[i], sg, offset, ivsize, 0); + } + rctx->niv = i; + sg = sg_next(sg); + } + + return 0; +dma_iv_error: + i--; + while (i >= 0) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + return err; +} + static int sun8i_ss_cipher(struct skcipher_request *areq) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); @@ -101,9 +163,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct sun8i_ss_alg_template *algt; + struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; struct scatterlist *sg; unsigned int todo, len, offset, ivsize; - void *backup_iv = NULL; int nr_sgs = 0; int nr_sgd = 0; int err = 0; @@ -134,30 +196,9 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) ivsize = crypto_skcipher_ivsize(tfm); if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { - rctx->ivlen = ivsize; - rctx->biv = kzalloc(ivsize, GFP_KERNEL | GFP_DMA); - if (!rctx->biv) { - err = -ENOMEM; + err = sun8i_ss_setup_ivs(areq); + if (err) goto theend_key; - } - if (rctx->op_dir & SS_DECRYPTION) { - backup_iv = kzalloc(ivsize, GFP_KERNEL); - if (!backup_iv) { - err = -ENOMEM; - goto theend_key; - } - offset = areq->cryptlen - ivsize; - scatterwalk_map_and_copy(backup_iv, areq->src, offset, - ivsize, 0); - } - memcpy(rctx->biv, areq->iv, ivsize); - rctx->p_iv = dma_map_single(ss->dev, rctx->biv, rctx->ivlen, - DMA_TO_DEVICE); - if (dma_mapping_error(ss->dev, rctx->p_iv)) { - dev_err(ss->dev, "Cannot DMA MAP IV\n"); - err = -ENOMEM; - goto theend_iv; - } } if (areq->src == areq->dst) { nr_sgs = dma_map_sg(ss->dev, areq->src, sg_nents(areq->src), @@ -243,21 +284,19 @@ static int sun8i_ss_cipher(struct skcipher_request *areq) } theend_iv: - if (rctx->p_iv) - dma_unmap_single(ss->dev, rctx->p_iv, rctx->ivlen, - DMA_TO_DEVICE); - if (areq->iv && ivsize > 0) { - if (rctx->biv) { - offset = areq->cryptlen - ivsize; - if (rctx->op_dir & SS_DECRYPTION) { - memcpy(areq->iv, backup_iv, ivsize); - kfree_sensitive(backup_iv); - } else { - scatterwalk_map_and_copy(areq->iv, areq->dst, offset, - ivsize, 0); - } - kfree(rctx->biv); + for (i = 0; i < rctx->niv; i++) { + dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); + memzero_explicit(sf->iv[i], ivsize); + } + + offset = areq->cryptlen - ivsize; + if (rctx->op_dir & SS_DECRYPTION) { + memcpy(areq->iv, sf->biv, ivsize); + memzero_explicit(sf->biv, ivsize); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, offset, + ivsize, 0); } } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 319fe3279a716..6575305786436 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -66,6 +66,7 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx const char *name) { int flow = rctx->flow; + unsigned int ivlen = rctx->ivlen; u32 v = SS_START; int i; @@ -104,15 +105,14 @@ int sun8i_ss_run_task(struct sun8i_ss_dev *ss, struct sun8i_cipher_req_ctx *rctx mutex_lock(&ss->mlock); writel(rctx->p_key, ss->base + SS_KEY_ADR_REG); - if (i == 0) { - if (rctx->p_iv) - writel(rctx->p_iv, ss->base + SS_IV_ADR_REG); - } else { - if (rctx->biv) { - if (rctx->op_dir == SS_ENCRYPTION) - writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + if (ivlen) { + if (rctx->op_dir == SS_ENCRYPTION) { + if (i == 0) + writel(rctx->p_iv[0], ss->base + SS_IV_ADR_REG); else - writel(rctx->t_src[i - 1].addr + rctx->t_src[i - 1].len * 4 - rctx->ivlen, ss->base + SS_IV_ADR_REG); + writel(rctx->t_dst[i - 1].addr + rctx->t_dst[i - 1].len * 4 - ivlen, ss->base + SS_IV_ADR_REG); + } else { + writel(rctx->p_iv[i], ss->base + SS_IV_ADR_REG); } } @@ -464,7 +464,7 @@ static void sun8i_ss_free_flows(struct sun8i_ss_dev *ss, int i) */ static int allocate_flows(struct sun8i_ss_dev *ss) { - int i, err; + int i, j, err; ss->flows = devm_kcalloc(ss->dev, MAXFLOW, sizeof(struct sun8i_ss_flow), GFP_KERNEL); @@ -474,6 +474,18 @@ static int allocate_flows(struct sun8i_ss_dev *ss) for (i = 0; i < MAXFLOW; i++) { init_completion(&ss->flows[i].complete); + ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].biv) + goto error_engine; + + for (j = 0; j < MAX_SG; j++) { + ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, + GFP_KERNEL | GFP_DMA); + if (!ss->flows[i].iv[j]) + goto error_engine; + } + ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true); if (!ss->flows[i].engine) { dev_err(ss->dev, "Cannot allocate engine\n"); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index 1a71ed49d2333..ca4f280af35d2 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -380,13 +380,21 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) } len = areq->nbytes; - for_each_sg(areq->src, sg, nr_sgs, i) { + sg = areq->src; + i = 0; + while (len > 0 && sg) { + if (sg_dma_len(sg) == 0) { + sg = sg_next(sg); + continue; + } rctx->t_src[i].addr = sg_dma_address(sg); todo = min(len, sg_dma_len(sg)); rctx->t_src[i].len = todo / 4; len -= todo; rctx->t_dst[i].addr = addr_res; rctx->t_dst[i].len = digestsize / 4; + sg = sg_next(sg); + i++; } if (len > 0) { dev_err(ss->dev, "remaining len %d\n", len); diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h index 28188685b9100..57ada86538550 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss.h @@ -121,11 +121,15 @@ struct sginfo { * @complete: completion for the current task on this flow * @status: set to 1 by interrupt if task is done * @stat_req: number of request done by this flow + * @iv: list of IV to use for each step + * @biv: buffer which contain the backuped IV */ struct sun8i_ss_flow { struct crypto_engine *engine; struct completion complete; int status; + u8 *iv[MAX_SG]; + u8 *biv; #ifdef CONFIG_CRYPTO_DEV_SUN8I_SS_DEBUG unsigned long stat_req; #endif @@ -164,28 +168,28 @@ struct sun8i_ss_dev { * @t_src: list of mapped SGs with their size * @t_dst: list of mapped SGs with their size * @p_key: DMA address of the key - * @p_iv: DMA address of the IV + * @p_iv: DMA address of the IVs + * @niv: Number of IVs DMA mapped * @method: current algorithm for this request * @op_mode: op_mode for this request * @op_dir: direction (encrypt vs decrypt) for this request * @flow: the flow to use for this request - * @ivlen: size of biv + * @ivlen: size of IVs * @keylen: keylen for this request - * @biv: buffer which contain the IV * @fallback_req: request struct for invoking the fallback skcipher TFM */ struct sun8i_cipher_req_ctx { struct sginfo t_src[MAX_SG]; struct sginfo t_dst[MAX_SG]; u32 p_key; - u32 p_iv; + u32 p_iv[MAX_SG]; + int niv; u32 method; u32 op_mode; u32 op_dir; int flow; unsigned int ivlen; unsigned int keylen; - void *biv; struct skcipher_request fallback_req; // keep at the end }; diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ca0361b2dbb07..f87aa2169e5f5 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -609,6 +609,13 @@ static bool check_version(struct fsl_mc_version *mc_version, u32 major, } #endif +static bool needs_entropy_delay_adjustment(void) +{ + if (of_machine_is_compatible("fsl,imx6sx")) + return true; + return false; +} + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -855,6 +862,8 @@ static int caam_probe(struct platform_device *pdev) * Also, if a handle was instantiated, do not change * the TRNG parameters. */ + if (needs_entropy_delay_adjustment()) + ent_delay = 12000; if (!(ctrlpriv->rng4_sh_init || inst_handles)) { dev_info(dev, "Entropy delay = %u\n", @@ -871,6 +880,15 @@ static int caam_probe(struct platform_device *pdev) */ ret = instantiate_rng(dev, inst_handles, gen_sk); + /* + * Entropy delay is determined via TRNG characterization. + * TRNG characterization is run across different voltages + * and temperatures. + * If worst case value for ent_dly is identified, + * the loop can be skipped for that platform. + */ + if (needs_entropy_delay_adjustment()) + break; if (ret == -EAGAIN) /* * if here, the loop will rerun, diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 6ab93dfd478a9..3aefb177715e9 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -170,6 +171,31 @@ static void *sev_fw_alloc(unsigned long len) return page_address(page); } +static struct file *open_file_as_root(const char *filename, int flags, umode_t mode) +{ + struct file *fp; + struct path root; + struct cred *cred; + const struct cred *old_cred; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + + cred = prepare_creds(); + if (!cred) + return ERR_PTR(-ENOMEM); + cred->fsuid = GLOBAL_ROOT_UID; + old_cred = override_creds(cred); + + fp = file_open_root(&root, filename, flags, mode); + path_put(&root); + + revert_creds(old_cred); + + return fp; +} + static int sev_read_init_ex_file(void) { struct sev_device *sev = psp_master->sev_data; @@ -181,7 +207,7 @@ static int sev_read_init_ex_file(void) if (!sev_init_ex_buffer) return -EOPNOTSUPP; - fp = filp_open(init_ex_path, O_RDONLY, 0); + fp = open_file_as_root(init_ex_path, O_RDONLY, 0); if (IS_ERR(fp)) { int ret = PTR_ERR(fp); @@ -217,7 +243,7 @@ static void sev_write_init_ex_file(void) if (!sev_init_ex_buffer) return; - fp = filp_open(init_ex_path, O_CREAT | O_WRONLY, 0600); + fp = open_file_as_root(init_ex_path, O_CREAT | O_WRONLY, 0600); if (IS_ERR(fp)) { dev_err(sev->dev, "SEV: could not open file for write, error %ld\n", diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 11e0278c8631d..6140e49273226 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -356,12 +356,14 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx, req_ctx->mlli_params.mlli_dma_addr); } - dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); - if (src != dst) { - dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE); dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst)); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + } else { + dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); + dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); } } @@ -377,6 +379,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; int rc = 0; u32 mapped_nents = 0; + int src_direction = (src != dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); req_ctx->dma_buf_type = CC_DMA_BUF_DLLI; mlli_params->curr_pool = NULL; @@ -399,7 +402,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } /* Map the src SGL */ - rc = cc_map_sg(dev, src, nbytes, DMA_BIDIRECTIONAL, &req_ctx->in_nents, + rc = cc_map_sg(dev, src, nbytes, src_direction, &req_ctx->in_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) goto cipher_exit; @@ -416,7 +419,7 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, } } else { /* Map the dst sg */ - rc = cc_map_sg(dev, dst, nbytes, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, dst, nbytes, DMA_FROM_DEVICE, &req_ctx->out_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dummy, &mapped_nents); if (rc) @@ -456,6 +459,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) struct aead_req_ctx *areq_ctx = aead_request_ctx(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); + int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); if (areq_ctx->mac_buf_dma_addr) { dma_unmap_single(dev, areq_ctx->mac_buf_dma_addr, @@ -514,13 +518,11 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); - dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction); if (req->src != req->dst) { dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", sg_virt(req->dst)); - dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, - DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE); } if (drvdata->coherent && areq_ctx->gen_ctx.op_type == DRV_CRYPTO_DIRECTION_DECRYPT && @@ -843,7 +845,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, else size_for_map -= authsize; - rc = cc_map_sg(dev, req->dst, size_for_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->dst, size_for_map, DMA_FROM_DEVICE, &areq_ctx->dst.mapped_nents, LLI_MAX_NUM_OF_DATA_ENTRIES, &dst_last_bytes, &dst_mapped_nents); @@ -1056,7 +1058,8 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) size_to_map += authsize; } - rc = cc_map_sg(dev, req->src, size_to_map, DMA_BIDIRECTIONAL, + rc = cc_map_sg(dev, req->src, size_to_map, + (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL), &areq_ctx->src.mapped_nents, (LLI_MAX_NUM_OF_ASSOC_DATA_ENTRIES + LLI_MAX_NUM_OF_DATA_ENTRIES), diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index b739d3b873dcf..c6f2fa753b7c0 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -624,7 +624,6 @@ struct skcipher_alg mv_cesa_ecb_des3_ede_alg = { .decrypt = mv_cesa_ecb_des3_ede_decrypt, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, .base = { .cra_name = "ecb(des3_ede)", .cra_driver_name = "mv-ecb-des3-ede", diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c index 32a036ada5d0a..f418817c0f43e 100644 --- a/drivers/crypto/nx/nx-common-powernv.c +++ b/drivers/crypto/nx/nx-common-powernv.c @@ -827,7 +827,7 @@ static int __init vas_cfg_coproc_info(struct device_node *dn, int chip_id, goto err_out; vas_init_rx_win_attr(&rxattr, coproc->ct); - rxattr.rx_fifo = (void *)rx_fifo; + rxattr.rx_fifo = rx_fifo; rxattr.rx_fifo_size = fifo_size; rxattr.lnotify_lpid = lpid; rxattr.lnotify_pid = pid; diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index a03c6cf723312..dfa7ee41c5e9c 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -152,9 +152,9 @@ struct adf_pfvf_ops { int (*enable_comms)(struct adf_accel_dev *accel_dev); u32 (*get_pf2vf_offset)(u32 i); u32 (*get_vf2pf_offset)(u32 i); - u32 (*get_vf2pf_sources)(void __iomem *pmisc_addr); void (*enable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); void (*disable_vf2pf_interrupts)(void __iomem *pmisc_addr, u32 vf_mask); + u32 (*disable_pending_vf2pf_interrupts)(void __iomem *pmisc_addr); int (*send_msg)(struct adf_accel_dev *accel_dev, struct pfvf_message msg, u32 pfvf_offset, struct mutex *csr_lock); struct pfvf_message (*recv_msg)(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c index 1a9072aac2ca9..def4cc8e1039a 100644 --- a/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen2_pfvf.c @@ -13,6 +13,7 @@ #include "adf_pfvf_utils.h" /* VF2PF interrupts */ +#define ADF_GEN2_VF_MSK 0xFFFF #define ADF_GEN2_ERR_REG_VF2PF(vf_src) (((vf_src) & 0x01FFFE00) >> 9) #define ADF_GEN2_ERR_MSK_VF2PF(vf_mask) (((vf_mask) & 0xFFFF) << 9) @@ -50,23 +51,6 @@ static u32 adf_gen2_vf_get_pfvf_offset(u32 i) return ADF_GEN2_VF_PF2VF_OFFSET; } -static u32 adf_gen2_get_vf2pf_sources(void __iomem *pmisc_addr) -{ - u32 errsou3, errmsk3, vf_int_mask; - - /* Get the interrupt sources triggered by VFs */ - errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); - vf_int_mask = ADF_GEN2_ERR_REG_VF2PF(errsou3); - - /* To avoid adding duplicate entries to work queue, clear - * vf_int_mask_sets bits that are already masked in ERRMSK register. - */ - errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); - vf_int_mask &= ~ADF_GEN2_ERR_REG_VF2PF(errmsk3); - - return vf_int_mask; -} - static void adf_gen2_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { @@ -89,6 +73,44 @@ static void adf_gen2_disable_vf2pf_interrupts(void __iomem *pmisc_addr, } } +static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, disabled, pending; + u32 errsou3, errmsk3; + + /* Get the interrupt sources triggered by VFs */ + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); + sources = ADF_GEN2_ERR_REG_VF2PF(errsou3); + + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); + disabled = ADF_GEN2_ERR_REG_VF2PF(errmsk3); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if ERRSOU3 changes just before writing to ERRMSK3. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + + errmsk3 &= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static u32 gen2_csr_get_int_bit(enum gen2_csr_pos offset) { return ADF_PFVF_INT << offset; @@ -362,9 +384,9 @@ void adf_gen2_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->enable_comms = adf_enable_pf2vf_comms; pfvf_ops->get_pf2vf_offset = adf_gen2_pf_get_pfvf_offset; pfvf_ops->get_vf2pf_offset = adf_gen2_pf_get_pfvf_offset; - pfvf_ops->get_vf2pf_sources = adf_gen2_get_vf2pf_sources; pfvf_ops->enable_vf2pf_interrupts = adf_gen2_enable_vf2pf_interrupts; pfvf_ops->disable_vf2pf_interrupts = adf_gen2_disable_vf2pf_interrupts; + pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen2_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen2_pf2vf_send; pfvf_ops->recv_msg = adf_gen2_vf2pf_recv; } diff --git a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c index d80d493a77568..40fdab857f959 100644 --- a/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/qat/qat_common/adf_gen4_pfvf.c @@ -15,6 +15,7 @@ /* VF2PF interrupt source registers */ #define ADF_4XXX_VM2PF_SOU 0x41A180 #define ADF_4XXX_VM2PF_MSK 0x41A1C0 +#define ADF_GEN4_VF_MSK 0xFFFF #define ADF_PFVF_GEN4_MSGTYPE_SHIFT 2 #define ADF_PFVF_GEN4_MSGTYPE_MASK 0x3F @@ -36,16 +37,6 @@ static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) return ADF_4XXX_VM2PF_OFFSET(i); } -static u32 adf_gen4_get_vf2pf_sources(void __iomem *pmisc_addr) -{ - u32 sou, mask; - - sou = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); - mask = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); - - return sou & ~mask; -} - static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { @@ -64,6 +55,37 @@ static void adf_gen4_disable_vf2pf_interrupts(void __iomem *pmisc_addr, ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, val); } +static u32 adf_gen4_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, disabled, pending; + + /* Get the interrupt sources triggered by VFs */ + sources = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_SOU); + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + disabled = ADF_CSR_RD(pmisc_addr, ADF_4XXX_VM2PF_MSK); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if VM2PF_SOU changes just before writing to VM2PF_MSK. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, ADF_GEN4_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_4XXX_VM2PF_MSK, disabled | sources); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static int adf_gen4_pfvf_send(struct adf_accel_dev *accel_dev, struct pfvf_message msg, u32 pfvf_offset, struct mutex *csr_lock) @@ -115,9 +137,9 @@ void adf_gen4_init_pf_pfvf_ops(struct adf_pfvf_ops *pfvf_ops) pfvf_ops->enable_comms = adf_enable_pf2vf_comms; pfvf_ops->get_pf2vf_offset = adf_gen4_pf_get_pf2vf_offset; pfvf_ops->get_vf2pf_offset = adf_gen4_pf_get_vf2pf_offset; - pfvf_ops->get_vf2pf_sources = adf_gen4_get_vf2pf_sources; pfvf_ops->enable_vf2pf_interrupts = adf_gen4_enable_vf2pf_interrupts; pfvf_ops->disable_vf2pf_interrupts = adf_gen4_disable_vf2pf_interrupts; + pfvf_ops->disable_pending_vf2pf_interrupts = adf_gen4_disable_pending_vf2pf_interrupts; pfvf_ops->send_msg = adf_gen4_pfvf_send; pfvf_ops->recv_msg = adf_gen4_pfvf_recv; } diff --git a/drivers/crypto/qat/qat_common/adf_isr.c b/drivers/crypto/qat/qat_common/adf_isr.c index a35149f8bf1ee..23f7fff32c642 100644 --- a/drivers/crypto/qat/qat_common/adf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_isr.c @@ -76,32 +76,29 @@ void adf_disable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask) spin_unlock_irqrestore(&accel_dev->pf.vf2pf_ints_lock, flags); } -static void adf_disable_vf2pf_interrupts_irq(struct adf_accel_dev *accel_dev, - u32 vf_mask) +static u32 adf_disable_pending_vf2pf_interrupts(struct adf_accel_dev *accel_dev) { void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); + u32 pending; spin_lock(&accel_dev->pf.vf2pf_ints_lock); - GET_PFVF_OPS(accel_dev)->disable_vf2pf_interrupts(pmisc_addr, vf_mask); + pending = GET_PFVF_OPS(accel_dev)->disable_pending_vf2pf_interrupts(pmisc_addr); spin_unlock(&accel_dev->pf.vf2pf_ints_lock); + + return pending; } static bool adf_handle_vf2pf_int(struct adf_accel_dev *accel_dev) { - void __iomem *pmisc_addr = adf_get_pmisc_base(accel_dev); bool irq_handled = false; unsigned long vf_mask; - /* Get the interrupt sources triggered by VFs */ - vf_mask = GET_PFVF_OPS(accel_dev)->get_vf2pf_sources(pmisc_addr); - + /* Get the interrupt sources triggered by VFs, except for those already disabled */ + vf_mask = adf_disable_pending_vf2pf_interrupts(accel_dev); if (vf_mask) { struct adf_accel_vf_info *vf_info; int i; - /* Disable VF2PF interrupts for VFs with pending ints */ - adf_disable_vf2pf_interrupts_irq(accel_dev, vf_mask); - /* * Handle VF2PF interrupt unless the VF is malicious and * is attempting to flood the host OS with VF2PF interrupts. diff --git a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c index 588352de1ef0e..d17318d3f63a4 100644 --- a/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/qat/qat_common/adf_pfvf_pf_proto.c @@ -154,7 +154,7 @@ static struct pfvf_message handle_blkmsg_req(struct adf_accel_vf_info *vf_info, if (FIELD_GET(ADF_VF2PF_BLOCK_CRC_REQ_MASK, req.data)) { dev_dbg(&GET_DEV(vf_info->accel_dev), "BlockMsg of type %d for CRC over %d bytes received from VF%d\n", - blk_type, blk_byte, vf_info->vf_nr); + blk_type, blk_byte + 1, vf_info->vf_nr); if (!adf_pf2vf_blkmsg_get_data(vf_info, blk_type, blk_byte, byte_max, &resp_data, diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 09599fe4d2f3f..91095ad479dc3 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -7,6 +7,8 @@ #include "adf_dh895xcc_hw_data.h" #include "icp_qat_hw.h" +#define ADF_DH895XCC_VF_MSK 0xFFFFFFFF + /* Worker thread to service arbiter mappings */ static const u32 thrd_to_arb_map[ADF_DH895XCC_MAX_ACCELENGINES] = { 0x12222AAA, 0x11666666, 0x12222AAA, 0x11666666, @@ -58,17 +60,24 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) capabilities = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | - ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + ICP_ACCEL_CAPABILITIES_AUTHENTICATION | + ICP_ACCEL_CAPABILITIES_CIPHER | + ICP_ACCEL_CAPABILITIES_COMPRESSION; /* Read accelerator capabilities mask */ pci_read_config_dword(pdev, ADF_DEVICE_LEGFUSE_OFFSET, &legfuses); - if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) + /* A set bit in legfuses means the feature is OFF in this SKU */ + if (legfuses & ICP_ACCEL_MASK_CIPHER_SLICE) { capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } if (legfuses & ICP_ACCEL_MASK_PKE_SLICE) capabilities &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; - if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) + if (legfuses & ICP_ACCEL_MASK_AUTH_SLICE) { capabilities &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + capabilities &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } if (legfuses & ICP_ACCEL_MASK_COMPRESS_SLICE) capabilities &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION; @@ -114,29 +123,6 @@ static void adf_enable_ints(struct adf_accel_dev *accel_dev) ADF_DH895XCC_SMIA1_MASK); } -static u32 get_vf2pf_sources(void __iomem *pmisc_bar) -{ - u32 errsou3, errmsk3, errsou5, errmsk5, vf_int_mask; - - /* Get the interrupt sources triggered by VFs */ - errsou3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU3); - vf_int_mask = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3); - - /* To avoid adding duplicate entries to work queue, clear - * vf_int_mask_sets bits that are already masked in ERRMSK register. - */ - errmsk3 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK3); - vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3); - - /* Do the same for ERRSOU5 */ - errsou5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRSOU5); - errmsk5 = ADF_CSR_RD(pmisc_bar, ADF_GEN2_ERRMSK5); - vf_int_mask |= ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5); - vf_int_mask &= ~ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5); - - return vf_int_mask; -} - static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) { /* Enable VF2PF Messaging Ints - VFs 0 through 15 per vf_mask[15:0] */ @@ -150,7 +136,6 @@ static void enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) if (vf_mask >> 16) { u32 val = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5) & ~ADF_DH895XCC_ERR_MSK_VF2PF_U(vf_mask); - ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, val); } } @@ -173,6 +158,54 @@ static void disable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) } } +static u32 disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) +{ + u32 sources, pending, disabled; + u32 errsou3, errmsk3; + u32 errsou5, errmsk5; + + /* Get the interrupt sources triggered by VFs */ + errsou3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU3); + errsou5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRSOU5); + sources = ADF_DH895XCC_ERR_REG_VF2PF_L(errsou3) + | ADF_DH895XCC_ERR_REG_VF2PF_U(errsou5); + + if (!sources) + return 0; + + /* Get the already disabled interrupts */ + errmsk3 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK3); + errmsk5 = ADF_CSR_RD(pmisc_addr, ADF_GEN2_ERRMSK5); + disabled = ADF_DH895XCC_ERR_REG_VF2PF_L(errmsk3) + | ADF_DH895XCC_ERR_REG_VF2PF_U(errmsk5); + + pending = sources & ~disabled; + if (!pending) + return 0; + + /* Due to HW limitations, when disabling the interrupts, we can't + * just disable the requested sources, as this would lead to missed + * interrupts if sources changes just before writing to ERRMSK3 and + * ERRMSK5. + * To work around it, disable all and re-enable only the sources that + * are not in vf_mask and were not already disabled. Re-enabling will + * trigger a new interrupt for the sources that have changed in the + * meantime, if any. + */ + errmsk3 |= ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK); + errmsk5 |= ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); + + errmsk3 &= ADF_DH895XCC_ERR_MSK_VF2PF_L(sources | disabled); + errmsk5 &= ADF_DH895XCC_ERR_MSK_VF2PF_U(sources | disabled); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); + ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); + + /* Return the sources of the (new) interrupt(s) */ + return pending; +} + static void configure_iov_threads(struct adf_accel_dev *accel_dev, bool enable) { adf_gen2_cfg_iov_thds(accel_dev, enable, @@ -220,9 +253,9 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->disable_iov = adf_disable_sriov; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); - hw_data->pfvf_ops.get_vf2pf_sources = get_vf2pf_sources; hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts; hw_data->pfvf_ops.disable_vf2pf_interrupts = disable_vf2pf_interrupts; + hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); } diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 49a4b1c47299f..44e899f06094c 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -27,12 +27,8 @@ static int wait_for_media(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_endpoint_dvsec_info *info = &cxlds->info; int rc; - if (!info->mem_enabled) - return -EBUSY; - rc = cxlds->wait_media_ready(cxlds); if (rc) return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3f2182d668292..bb92853c3b933 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -462,16 +462,24 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) return 0; } -static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +/* + * Return positive number of non-zero ranges on success and a negative + * error code on failure. The cxl_mem driver depends on ranges == 0 to + * init HDM operation. + */ +static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int hdm_count, rc, i, ranges = 0; + struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; - int hdm_count, rc, i; u16 cap, ctrl; - if (!d) + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); return -ENXIO; + } rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); if (rc) @@ -481,8 +489,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; + } /* * It is not allowed by spec for MEM.capable to be set and have 0 legacy @@ -495,8 +505,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) return -EINVAL; rc = wait_for_valid(cxlds); - if (rc) + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); return rc; + } info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); @@ -538,10 +550,17 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) }; if (size) - info->ranges++; + ranges++; } - return 0; + return ranges; +} + +static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + + info->ranges = __cxl_dvsec_ranges(cxlds, info); } static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -610,10 +629,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds); - if (rc) - dev_warn(&pdev->dev, - "Failed to get DVSEC range information (%d)\n", rc); + cxl_dvsec_ranges(cxlds); cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 293857ebfd75d..538e8dc74f40a 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -477,6 +477,8 @@ static int rk3399_dmcfreq_remove(struct platform_device *pdev) { struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(&pdev->dev); + devfreq_event_disable_edev(dmcfreq->edev); + /* * Before remove the opp table we need to unregister the opp notifier. */ diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index b9b2b4a4124ee..033df43db0cec 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -369,10 +369,16 @@ int idxd_cdev_register(void) rc = alloc_chrdev_region(&ictx[i].devt, 0, MINORMASK, ictx[i].name); if (rc) - return rc; + goto err_free_chrdev_region; } return 0; + +err_free_chrdev_region: + for (i--; i >= 0; i--) + unregister_chrdev_region(ictx[i].devt, MINORMASK); + + return rc; } void idxd_cdev_remove(void) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index bfff59617d047..f9ed550117756 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -87,6 +87,27 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->completion_addr = compl; } +static struct dma_async_tx_descriptor * +idxd_dma_prep_interrupt(struct dma_chan *c, unsigned long flags) +{ + struct idxd_wq *wq = to_idxd_wq(c); + u32 desc_flags; + struct idxd_desc *desc; + + if (wq->state != IDXD_WQ_ENABLED) + return NULL; + + op_flag_setup(flags, &desc_flags); + desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(desc)) + return NULL; + + idxd_prep_desc_common(wq, desc->hw, DSA_OPCODE_NOOP, + 0, 0, 0, desc->compl_dma, desc_flags); + desc->txd.flags = flags; + return &desc->txd; +} + static struct dma_async_tx_descriptor * idxd_dma_submit_memcpy(struct dma_chan *c, dma_addr_t dma_dest, dma_addr_t dma_src, size_t len, unsigned long flags) @@ -193,10 +214,12 @@ int idxd_register_dma_device(struct idxd_device *idxd) INIT_LIST_HEAD(&dma->channels); dma->dev = dev; + dma_cap_set(DMA_INTERRUPT, dma->cap_mask); dma_cap_set(DMA_PRIVATE, dma->cap_mask); dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); dma->device_release = idxd_dma_release; + dma->device_prep_dma_interrupt = idxd_dma_prep_interrupt; if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) { dma_cap_set(DMA_MEMCPY, dma->cap_mask); dma->device_prep_dma_memcpy = idxd_dma_submit_memcpy; diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 6f57ff0e7b37b..f8c8b9d76aadc 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -34,7 +34,6 @@ #include "virt-dma.h" #define STM32_MDMA_GISR0 0x0000 /* MDMA Int Status Reg 1 */ -#define STM32_MDMA_GISR1 0x0004 /* MDMA Int Status Reg 2 */ /* MDMA Channel x interrupt/status register */ #define STM32_MDMA_CISR(x) (0x40 + 0x40 * (x)) /* x = 0..62 */ @@ -168,7 +167,7 @@ #define STM32_MDMA_MAX_BUF_LEN 128 #define STM32_MDMA_MAX_BLOCK_LEN 65536 -#define STM32_MDMA_MAX_CHANNELS 63 +#define STM32_MDMA_MAX_CHANNELS 32 #define STM32_MDMA_MAX_REQUESTS 256 #define STM32_MDMA_MAX_BURST 128 #define STM32_MDMA_VERY_HIGH_PRIORITY 0x3 @@ -1317,26 +1316,16 @@ static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan) static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid) { struct stm32_mdma_device *dmadev = devid; - struct stm32_mdma_chan *chan = devid; + struct stm32_mdma_chan *chan; u32 reg, id, ccr, ien, status; /* Find out which channel generates the interrupt */ status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0); - if (status) { - id = __ffs(status); - } else { - status = readl_relaxed(dmadev->base + STM32_MDMA_GISR1); - if (!status) { - dev_dbg(mdma2dev(dmadev), "spurious it\n"); - return IRQ_NONE; - } - id = __ffs(status); - /* - * As GISR0 provides status for channel id from 0 to 31, - * so GISR1 provides status for channel id from 32 to 62 - */ - id += 32; + if (!status) { + dev_dbg(mdma2dev(dmadev), "spurious it\n"); + return IRQ_NONE; } + id = __ffs(status); chan = &dmadev->chan[id]; if (!chan) { diff --git a/drivers/dma/ti/k3-psil-am62.c b/drivers/dma/ti/k3-psil-am62.c index d431e20332378..2b6fd6e37c610 100644 --- a/drivers/dma/ti/k3-psil-am62.c +++ b/drivers/dma/ti/k3-psil-am62.c @@ -70,10 +70,10 @@ /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */ static struct psil_ep am62_src_ep_map[] = { /* SAUL */ - PSIL_SAUL(0x7500, 20, 35, 8, 35, 0), - PSIL_SAUL(0x7501, 21, 35, 8, 36, 0), - PSIL_SAUL(0x7502, 22, 43, 8, 43, 0), - PSIL_SAUL(0x7503, 23, 43, 8, 44, 0), + PSIL_SAUL(0x7504, 20, 35, 8, 35, 0), + PSIL_SAUL(0x7505, 21, 35, 8, 36, 0), + PSIL_SAUL(0x7506, 22, 43, 8, 43, 0), + PSIL_SAUL(0x7507, 23, 43, 8, 44, 0), /* PDMA_MAIN0 - SPI0-3 */ PSIL_PDMA_XY_PKT(0x4302), PSIL_PDMA_XY_PKT(0x4303), diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index 7aa63b6520272..3ffa7f37c7017 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -229,7 +229,7 @@ struct zynqmp_dma_chan { bool is_dmacoherent; struct tasklet_struct tasklet; bool idle; - u32 desc_size; + size_t desc_size; bool err; u32 bus_width; u32 src_burst_len; @@ -486,7 +486,8 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) } chan->desc_pool_v = dma_alloc_coherent(chan->dev, - (2 * chan->desc_size * ZYNQMP_DMA_NUM_DESCS), + (2 * ZYNQMP_DMA_DESC_SIZE(chan) * + ZYNQMP_DMA_NUM_DESCS), &chan->desc_pool_p, GFP_KERNEL); if (!chan->desc_pool_v) return -ENOMEM; diff --git a/drivers/edac/dmc520_edac.c b/drivers/edac/dmc520_edac.c index b8a7d9594afd4..1fa5ca57e9ec1 100644 --- a/drivers/edac/dmc520_edac.c +++ b/drivers/edac/dmc520_edac.c @@ -489,7 +489,7 @@ static int dmc520_edac_probe(struct platform_device *pdev) dev = &pdev->dev; for (idx = 0; idx < NUMBER_OF_IRQS; idx++) { - irq = platform_get_irq_byname(pdev, dmc520_irq_configs[idx].name); + irq = platform_get_irq_byname_optional(pdev, dmc520_irq_configs[idx].name); irqs[idx] = irq; masks[idx] = dmc520_irq_configs[idx].mask; if (irq >= 0) { diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index 7c6d5857ff25e..180be768c2157 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -394,8 +394,8 @@ static int axp288_extcon_probe(struct platform_device *pdev) if (adev) { info->id_extcon = extcon_get_extcon_dev(acpi_dev_name(adev)); put_device(&adev->dev); - if (!info->id_extcon) - return -EPROBE_DEFER; + if (IS_ERR(info->id_extcon)) + return PTR_ERR(info->id_extcon); dev_info(dev, "controlling USB role\n"); } else { diff --git a/drivers/extcon/extcon-ptn5150.c b/drivers/extcon/extcon-ptn5150.c index 5b9a3cf8df268..2a7874108df87 100644 --- a/drivers/extcon/extcon-ptn5150.c +++ b/drivers/extcon/extcon-ptn5150.c @@ -194,6 +194,13 @@ static int ptn5150_init_dev_type(struct ptn5150_info *info) return 0; } +static void ptn5150_work_sync_and_put(void *data) +{ + struct ptn5150_info *info = data; + + cancel_work_sync(&info->irq_work); +} + static int ptn5150_i2c_probe(struct i2c_client *i2c) { struct device *dev = &i2c->dev; @@ -284,6 +291,10 @@ static int ptn5150_i2c_probe(struct i2c_client *i2c) if (ret) return -EINVAL; + ret = devm_add_action_or_reset(dev, ptn5150_work_sync_and_put, info); + if (ret) + return ret; + /* * Update current extcon state if for example OTG connection was there * before the probe diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index a09e704fd0fa1..97e35c32bfa55 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -851,6 +851,8 @@ EXPORT_SYMBOL_GPL(extcon_set_property_capability); * @extcon_name: the extcon name provided with extcon_dev_register() * * Return the pointer of extcon device if success or ERR_PTR(err) if fail. + * NOTE: This function returns -EPROBE_DEFER so it may only be called from + * probe() functions. */ struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) { @@ -864,7 +866,7 @@ struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name) if (!strcmp(sd->name, extcon_name)) goto out; } - sd = NULL; + sd = ERR_PTR(-EPROBE_DEFER); out: mutex_unlock(&extcon_dev_list_lock); return sd; @@ -1218,19 +1220,14 @@ int extcon_dev_register(struct extcon_dev *edev) edev->dev.type = &edev->extcon_dev_type; } - ret = device_register(&edev->dev); - if (ret) { - put_device(&edev->dev); - goto err_dev; - } - spin_lock_init(&edev->lock); - edev->nh = devm_kcalloc(&edev->dev, edev->max_supported, - sizeof(*edev->nh), GFP_KERNEL); - if (!edev->nh) { - ret = -ENOMEM; - device_unregister(&edev->dev); - goto err_dev; + if (edev->max_supported) { + edev->nh = kcalloc(edev->max_supported, sizeof(*edev->nh), + GFP_KERNEL); + if (!edev->nh) { + ret = -ENOMEM; + goto err_alloc_nh; + } } for (index = 0; index < edev->max_supported; index++) @@ -1241,6 +1238,12 @@ int extcon_dev_register(struct extcon_dev *edev) dev_set_drvdata(&edev->dev, edev); edev->state = 0; + ret = device_register(&edev->dev); + if (ret) { + put_device(&edev->dev); + goto err_dev; + } + mutex_lock(&extcon_dev_list_lock); list_add(&edev->entry, &extcon_dev_list); mutex_unlock(&extcon_dev_list_lock); @@ -1248,6 +1251,9 @@ int extcon_dev_register(struct extcon_dev *edev) return 0; err_dev: + if (edev->max_supported) + kfree(edev->nh); +err_alloc_nh: if (edev->max_supported) kfree(edev->extcon_dev_type.groups); err_alloc_groups: @@ -1308,6 +1314,7 @@ void extcon_dev_unregister(struct extcon_dev *edev) if (edev->max_supported) { kfree(edev->extcon_dev_type.groups); kfree(edev->cables); + kfree(edev->nh); } put_device(&edev->dev); diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 14f900047ac0c..44300dbcc643d 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -582,7 +582,7 @@ static int ffa_partition_info_get(const char *uuid_str, return -ENODEV; } - count = ffa_partition_probe(&uuid_null, &pbuf); + count = ffa_partition_probe(&uuid, &pbuf); if (count <= 0) return -ENOENT; @@ -688,8 +688,6 @@ static void ffa_setup_partitions(void) __func__, tpbuf->id); continue; } - - ffa_dev_set_drvdata(ffa_dev, drv_info); } kfree(pbuf); } diff --git a/drivers/firmware/arm_scmi/base.c b/drivers/firmware/arm_scmi/base.c index f5219334fd3a5..3fe172c03c247 100644 --- a/drivers/firmware/arm_scmi/base.c +++ b/drivers/firmware/arm_scmi/base.c @@ -197,7 +197,7 @@ scmi_base_implementation_list_get(const struct scmi_protocol_handle *ph, break; loop_num_ret = le32_to_cpu(*num_ret); - if (tot_num_ret + loop_num_ret > MAX_PROTOCOLS_IMP) { + if (loop_num_ret > MAX_PROTOCOLS_IMP - tot_num_ret) { dev_err(dev, "No. of Protocol > MAX_PROTOCOLS_IMP"); break; } diff --git a/drivers/firmware/dmi-sysfs.c b/drivers/firmware/dmi-sysfs.c index 3a353776bd344..66727ad3361b9 100644 --- a/drivers/firmware/dmi-sysfs.c +++ b/drivers/firmware/dmi-sysfs.c @@ -604,7 +604,7 @@ static void __init dmi_sysfs_register_handle(const struct dmi_header *dh, "%d-%d", dh->type, entry->instance); if (*ret) { - kfree(entry); + kobject_put(&entry->kobj); return; } diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 2c3dac5ecb36d..243882f5e5f99 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -284,3 +284,18 @@ config EFI_CUSTOM_SSDT_OVERLAYS See Documentation/admin-guide/acpi/ssdt-overlays.rst for more information. + +config EFI_DISABLE_RUNTIME + bool "Disable EFI runtime services support by default" + default y if PREEMPT_RT + help + Allow to disable the EFI runtime services support by default. This can + already be achieved by using the efi=noruntime option, but it could be + useful to have this default without any kernel command line parameter. + + The EFI runtime services are disabled by default when PREEMPT_RT is + enabled, because measurements have shown that some EFI functions calls + might take too much time to complete, causing large latencies which is + an issue for Real-Time kernels. + + This default can be overridden by using the efi=runtime option. diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5502e176d51be..ff57db8f8d059 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -66,7 +66,7 @@ struct mm_struct efi_mm = { struct workqueue_struct *efi_rts_wq; -static bool disable_runtime = IS_ENABLED(CONFIG_PREEMPT_RT); +static bool disable_runtime = IS_ENABLED(CONFIG_EFI_DISABLE_RUNTIME); static int __init setup_noefi(char *arg) { disable_runtime = true; diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 8177a0fae11d8..14663f6713235 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -948,17 +948,17 @@ EXPORT_SYMBOL_GPL(stratix10_svc_allocate_memory); void stratix10_svc_free_memory(struct stratix10_svc_chan *chan, void *kaddr) { struct stratix10_svc_data_mem *pmem; - size_t size = 0; list_for_each_entry(pmem, &svc_data_mem, node) if (pmem->vaddr == kaddr) { - size = pmem->size; - break; + gen_pool_free(chan->ctrl->genpool, + (unsigned long)kaddr, pmem->size); + pmem->vaddr = NULL; + list_del(&pmem->node); + return; } - gen_pool_free(chan->ctrl->genpool, (unsigned long)kaddr, size); - pmem->vaddr = NULL; - list_del(&pmem->node); + list_del(&svc_data_mem); } EXPORT_SYMBOL_GPL(stratix10_svc_free_memory); diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 8726921a11294..33683295a0bfe 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1108,20 +1108,21 @@ static int pca953x_regcache_sync(struct device *dev) { struct pca953x_chip *chip = dev_get_drvdata(dev); int ret; + u8 regaddr; /* * The ordering between direction and output is important, * sync these registers first and only then sync the rest. */ - ret = regcache_sync_region(chip->regmap, chip->regs->direction, - chip->regs->direction + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); if (ret) { dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret); return ret; } - ret = regcache_sync_region(chip->regmap, chip->regs->output, - chip->regs->output + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0); + ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip)); if (ret) { dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret); return ret; @@ -1129,16 +1130,18 @@ static int pca953x_regcache_sync(struct device *dev) #ifdef CONFIG_GPIO_PCA953X_IRQ if (chip->driver_data & PCA_PCAL) { - ret = regcache_sync_region(chip->regmap, PCAL953X_IN_LATCH, - PCAL953X_IN_LATCH + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip)); if (ret) { dev_err(dev, "Failed to sync INT latch registers: %d\n", ret); return ret; } - ret = regcache_sync_region(chip->regmap, PCAL953X_INT_MASK, - PCAL953X_INT_MASK + NBANK(chip)); + regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0); + ret = regcache_sync_region(chip->regmap, regaddr, + regaddr + NBANK(chip)); if (ret) { dev_err(dev, "Failed to sync INT mask registers: %d\n", ret); diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 099e358d24915..bcf5214e35866 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "../pinctrl/core.h" @@ -706,7 +707,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev) struct device_node *pctlnp = of_get_parent(np); struct pinctrl_dev *pctldev = NULL; struct rockchip_pin_bank *bank = NULL; - struct rockchip_pin_output_deferred *cfg; + struct rockchip_pin_deferred *cfg; static int gpio; int id, ret; @@ -747,15 +748,22 @@ static int rockchip_gpio_probe(struct platform_device *pdev) return ret; } - while (!list_empty(&bank->deferred_output)) { - cfg = list_first_entry(&bank->deferred_output, - struct rockchip_pin_output_deferred, head); + while (!list_empty(&bank->deferred_pins)) { + cfg = list_first_entry(&bank->deferred_pins, + struct rockchip_pin_deferred, head); list_del(&cfg->head); - ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg); - if (ret) - dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, cfg->arg); - + switch (cfg->param) { + case PIN_CONFIG_OUTPUT: + ret = rockchip_gpio_direction_output(&bank->gpio_chip, cfg->pin, cfg->arg); + if (ret) + dev_warn(dev, "setting output pin %u to %u failed\n", cfg->pin, + cfg->arg); + break; + default: + dev_warn(dev, "unknown deferred config param %d\n", cfg->param); + break; + } kfree(cfg); } diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 41c31b10ae848..98109839102fb 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -314,8 +314,8 @@ static int gpio_sim_setup_sysfs(struct gpio_sim_chip *chip) for (i = 0; i < num_lines; i++) { attr_group = devm_kzalloc(dev, sizeof(*attr_group), GFP_KERNEL); - attrs = devm_kcalloc(dev, sizeof(*attrs), - GPIO_SIM_NUM_ATTRS, GFP_KERNEL); + attrs = devm_kcalloc(dev, GPIO_SIM_NUM_ATTRS, sizeof(*attrs), + GFP_KERNEL); val_attr = devm_kzalloc(dev, sizeof(*val_attr), GFP_KERNEL); pull_attr = devm_kzalloc(dev, sizeof(*pull_attr), GFP_KERNEL); if (!attr_group || !attrs || !val_attr || !pull_attr) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 7e5e51d49d09e..6dec81b1f24be 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -931,6 +931,11 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) if (!np) return 0; + if (!of_property_read_bool(np, "gpio-ranges") && + chip->of_gpio_ranges_fallback) { + return chip->of_gpio_ranges_fallback(chip, np); + } + group_names = of_find_property(np, group_names_propname, NULL); for (;; index++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d0d0ea565e3df..2019622191b59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs int ret; if (cs->in.num_chunks == 0) - return 0; + return -EINVAL; chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL); if (!chunk_array) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 46ef57b07c151..7e6f8475819dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1931,6 +1931,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7421, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7422, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x7423, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, + {0x1002, 0x7424, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, {0x1002, 0x743F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, { PCI_DEVICE(0x1002, PCI_ANY_ID), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a66a0881a934b..88b852b3a2cb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -25,6 +25,9 @@ */ #include +#ifdef CONFIG_X86 +#include +#endif #include "amdgpu.h" #include "amdgpu_gmc.h" @@ -647,12 +650,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) case CHIP_VEGA10: adev->mman.keep_stolen_vga_memory = true; /* - * VEGA10 SRIOV VF needs some firmware reserved area. + * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. */ - if (amdgpu_sriov_vf(adev)) { - adev->mman.stolen_reserved_offset = 0x100000; - adev->mman.stolen_reserved_size = 0x600000; +#ifdef CONFIG_X86 + if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { + adev->mman.stolen_reserved_offset = 0x500000; + adev->mman.stolen_reserved_size = 0x200000; } +#endif break; case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6acec1a6155d..21aa556a6befa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -357,7 +357,39 @@ static int psp_sw_init(void *handle) } } + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + amdgpu_sriov_vf(adev) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed1; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + if (ret) + goto failed2; + return 0; + +failed2: + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); +failed1: + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + return ret; } static int psp_sw_fini(void *handle) @@ -391,6 +423,13 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + return 0; } @@ -2430,51 +2469,18 @@ static int psp_load_fw(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { - psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ - goto skip_memalloc; - } - - if (amdgpu_sriov_vf(adev)) { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); + /* should not destroy ring, only stop */ + psp_ring_stop(psp, PSP_RING_TYPE__KM); } else { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - } - - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fence_buf_bo, - &psp->fence_buf_mc_addr, - &psp->fence_buf); - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - if (ret) - goto failed; + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - - ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) { - DRM_ERROR("PSP ring init failed!\n"); - goto failed; + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) { + DRM_ERROR("PSP ring init failed!\n"); + goto failed; + } } -skip_memalloc: ret = psp_hw_start(psp); if (ret) goto failed; @@ -2592,13 +2598,6 @@ static int psp_hw_fini(void *handle) psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index ca33505026189..aebafbc327fb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -714,8 +714,7 @@ int amdgpu_ucode_create_bo(struct amdgpu_device *adev) void amdgpu_ucode_free_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 5e3756643da3f..1d55b2bae37e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -864,11 +864,11 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v uint32_t timeout = 50000; uint32_t i, tmp; uint32_t ret = 0; - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; + void *scratch_reg0; + void *scratch_reg1; + void *scratch_reg2; + void *scratch_reg3; + void *spare_int; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 299de1d131d82..5ac1a43a16b4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, { unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); + amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { 8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */ 18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */ 8 + 16, - .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ + .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */ .emit_ib = jpeg_v2_0_dec_ring_emit_ib, .emit_fence = jpeg_v2_0_dec_ring_emit_fence, .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa597557..654e43e83e2c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@ #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e19f14c3ef59f..8af685fd85318 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -170,6 +170,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs yc_video_codecs_decode = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d7e8f72323641..ff86c43b63d11 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a8d49c005f73d..627eb1f147c20 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr) << 2); + lower_32_bits(ring->wptr << 2)); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr) << 2); + upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 824eace698842..a5eb82bfeaa8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index cb5f0a12333f3..57a34e775da38 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1821,23 +1821,21 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, - struct amdgpu_job *job) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&job->base.entity->fence_seq)) + if (atomic_read(&p->entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(job->base.entity, scheds, 1); + drm_sched_entity_modify_sched(p->entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, - uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1908,7 +1906,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p, job); + r = vcn_v3_0_limit_sched(p); if (r) goto out; } @@ -1922,7 +1920,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1941,8 +1939,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, job, - ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 607f65ab39ac8..10cc834a5ac31 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -944,8 +944,6 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, bool kfd_dev_is_large_bar(struct kfd_dev *dev) { - struct kfd_local_mem_info mem_info; - if (debug_largebar) { pr_debug("Simulate large-bar allocation on non large-bar machine\n"); return true; @@ -954,9 +952,8 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) if (dev->use_iommu_v2) return false; - amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info); - if (mem_info.local_mem_size_private == 0 && - mem_info.local_mem_size_public > 0) + if (dev->local_mem_info.local_mem_size_private == 0 && + dev->local_mem_info.local_mem_size_public > 0) return true; return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1eaabd2cb41b0..59b349a4c04a3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -2152,7 +2152,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info); + local_mem_info = kdev->local_mem_info; sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 62aa6c9d5123d..651498bfecc8d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -156,7 +156,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd, if (gc_version < IP_VERSION(11, 0, 0)) { /* Navi2x+, Navi1x+ */ - if (gc_version >= IP_VERSION(10, 3, 0)) + if (gc_version == IP_VERSION(10, 3, 6)) + kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) kfd->device_info.no_atomic_fw_version = 145; @@ -575,6 +577,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, if (kfd_resume(kfd)) goto kfd_resume_error; + amdgpu_amdkfd_get_local_mem_info(kfd->adev, &kfd->local_mem_info); + if (kfd_topology_add_device(kfd)) { dev_err(kfd_device, "Error adding device to topology\n"); goto kfd_topology_add_device_error; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 8f58fc491b289..49a29a60b71e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -272,6 +272,7 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; struct kfd_vmid_info vm_info; + struct kfd_local_mem_info local_mem_info; const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3bdcae239bc0f..9fc24f6823dfc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1102,15 +1102,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) uint32_t buf[7]; uint64_t local_mem_size; int i; - struct kfd_local_mem_info local_mem_info; if (!gpu) return 0; - amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info); - - local_mem_size = local_mem_info.local_mem_size_private + - local_mem_info.local_mem_size_public; + local_mem_size = gpu->local_mem_info.local_mem_size_private + + gpu->local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; buf[1] = gpu->pdev->subsystem_vendor | diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 62139ff35476c..8dd03de7c2775 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -771,7 +771,7 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) do { dc_stat_get_dmub_notification(adev->dm.dc, ¬ify); - if (notify.type > ARRAY_SIZE(dm->dmub_thread_offload)) { + if (notify.type >= ARRAY_SIZE(dm->dmub_thread_offload)) { DRM_ERROR("DM: notify type %d invalid!", notify.type); continue; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index 8be4c19706285..2918ad07d489d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -41,9 +41,7 @@ #include "dc_dmub_srv.h" -#if defined (CONFIG_DRM_AMD_DC_DP2_0) #include "dc_link_dp.h" -#endif #define TO_CLK_MGR_DCN315(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn315, base) @@ -91,7 +89,8 @@ static void dcn315_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 3121dd2d2a911..fc3af81ed6c62 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -122,7 +122,8 @@ static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable) if (pipe->top_pipe || pipe->prev_odm_pipe) continue; - if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal))) { + if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL || + dc_is_virtual_signal(pipe->stream->signal))) { if (disable) pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg); else diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index cc5128e67daff..8e9a7409c17a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1105,9 +1105,12 @@ static bool get_pixel_clk_frequency_100hz( * not be programmed equal to DPREFCLK */ modulo_hz = REG_READ(MODULO[inst]); - *pixel_clk_khz = div_u64((uint64_t)clock_hz* - clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, - modulo_hz); + if (modulo_hz) + *pixel_clk_khz = div_u64((uint64_t)clock_hz* + clock_source->ctx->dc->clk_mgr->dprefclk_khz*10, + modulo_hz); + else + *pixel_clk_khz = 0; } else { /* NOTE: There is agreement with VBIOS here that MODULO is * programmed equal to DPREFCLK, in which case PHASE will be diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 63934ecf6be84..d71e625cc476e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1030,6 +1030,7 @@ static const struct dc_debug_options debug_defaults_drv = { .afmt = true, } }, + .disable_z10 = true, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, .apply_vendor_specific_lttpr_wa = true, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c index 789f7562cdc75..d2273674e8729 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml_wrapper.c @@ -1284,10 +1284,8 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) for (i = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; -#if defined (CONFIG_DRM_AMD_DC_DP2_0) if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i])) return true; -#endif } return false; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 72e7b5d40af69..5472f9936febc 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -790,7 +790,7 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; - if (!pp_funcs->force_performance_level) + if (!pp_funcs || !pp_funcs->force_performance_level) return 0; if (adev->pm.dpm.thermal_active) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 8b23cc9f098ad..8fd0782a2b206 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -1623,19 +1623,7 @@ static int kv_update_samu_dpm(struct amdgpu_device *adev, bool gate) static u8 kv_get_acp_boot_level(struct amdgpu_device *adev) { - u8 i; - struct amdgpu_clock_voltage_dependency_table *table = - &adev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table; - - for (i = 0; i < table->count; i++) { - if (table->entries[i].clk >= 0) /* XXX */ - break; - } - - if (i >= table->count) - i = table->count - 1; - - return i; + return 0; } static void kv_update_acp_boot_level(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index 633dab14f51c2..49c398ec0aaf6 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7297,17 +7297,15 @@ static int si_parse_power_table(struct amdgpu_device *adev) if (!adev->pm.dpm.ps) return -ENOMEM; power_state_offset = (u8 *)state_array->states; - for (i = 0; i < state_array->ucNumEntries; i++) { + for (adev->pm.dpm.num_ps = 0, i = 0; i < state_array->ucNumEntries; i++) { u8 *idx; power_state = (union pplib_power_state *)power_state_offset; non_clock_array_index = power_state->v2.nonClockInfoIndex; non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *) &non_clock_info_array->nonClockInfo[non_clock_array_index]; ps = kzalloc(sizeof(struct si_ps), GFP_KERNEL); - if (ps == NULL) { - kfree(adev->pm.dpm.ps); + if (ps == NULL) return -ENOMEM; - } adev->pm.dpm.ps[i].ps_priv = ps; si_parse_pplib_non_clock_info(adev, &adev->pm.dpm.ps[i], non_clock_info, @@ -7329,8 +7327,8 @@ static int si_parse_power_table(struct amdgpu_device *adev) k++; } power_state_offset += 2 + power_state->v2.ucNumDPMLevels; + adev->pm.dpm.num_ps++; } - adev->pm.dpm.num_ps = state_array->ucNumEntries; /* fill in the vce power states */ for (i = 0; i < adev->pm.dpm.num_of_vce_states; i++) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index f10a0256413e6..32cff21f261ca 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -576,6 +576,8 @@ static int smu_early_init(void *handle) smu->smu_baco.platform_support = false; smu->user_dpm_profile.fan_mode = -1; + mutex_init(&smu->message_lock); + adev->powerplay.pp_handle = smu; adev->powerplay.pp_funcs = &swsmu_pm_funcs; @@ -975,8 +977,6 @@ static int smu_sw_init(void *handle) bitmap_zero(smu->smu_feature.supported, SMU_FEATURE_MAX); bitmap_zero(smu->smu_feature.allowed, SMU_FEATURE_MAX); - mutex_init(&smu->message_lock); - INIT_WORK(&smu->throttling_logging_work, smu_throttling_logging_work_fn); INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn); atomic64_set(&smu->throttle_int_counter, 0); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 38f04836c82f2..7a1e225fb8230 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -586,12 +586,28 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, uint16_t average_gfx_activity; int ret = 0; - if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, NULL, @@ -3701,13 +3717,28 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, uint16_t average_gfx_activity; int ret = 0; - if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4900)) - use_metrics_v3 = true; - else if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) && - (smu->smc_fw_version >= 0x3A4300)) - use_metrics_v2 = true; - + switch (smu->adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(11, 0, 7): + if (smu->smc_fw_version >= 0x3A4900) + use_metrics_v3 = true; + else if (smu->smc_fw_version >= 0x3A4300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 11): + if (smu->smc_fw_version >= 0x412D00) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 12): + if (smu->smc_fw_version >= 0x3B2300) + use_metrics_v2 = true; + break; + case IP_VERSION(11, 0, 13): + if (smu->smc_fw_version >= 0x491100) + use_metrics_v2 = true; + break; + default: + break; + } ret = smu_cmn_get_metrics_table(smu, &metrics_external, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index b87f550af26ba..5f8809f6990dd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -781,7 +781,7 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu) goto failed; } - bitmap_copy((unsigned long *)feature_mask, feature->allowed, 64); + bitmap_to_arr32(feature_mask, feature->allowed, 64); ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskHigh, feature_mask[1], NULL); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index fd6c44ece1688..012e3bd99cc23 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1119,6 +1119,39 @@ static int renoir_get_power_profile_mode(struct smu_context *smu, return size; } +static void renoir_get_ss_power_percent(SmuMetrics_t *metrics, + uint32_t *apu_percent, uint32_t *dgpu_percent) +{ + uint32_t apu_boost = 0; + uint32_t dgpu_boost = 0; + uint16_t apu_limit = 0; + uint16_t dgpu_limit = 0; + uint16_t apu_power = 0; + uint16_t dgpu_power = 0; + + apu_power = metrics->ApuPower; + apu_limit = metrics->StapmOriginalLimit; + if (apu_power > apu_limit && apu_limit != 0) + apu_boost = ((apu_power - apu_limit) * 100) / apu_limit; + apu_boost = (apu_boost > 100) ? 100 : apu_boost; + + dgpu_power = metrics->dGpuPower; + if (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit) + dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOriginalLimit; + if (dgpu_power > dgpu_limit && dgpu_limit != 0) + dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit; + dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost; + + if (dgpu_boost >= apu_boost) + apu_boost = 0; + else + dgpu_boost = 0; + + *apu_percent = apu_boost; + *dgpu_percent = dgpu_boost; +} + + static int renoir_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) @@ -1127,6 +1160,9 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; int ret = 0; + uint32_t apu_percent = 0; + uint32_t dgpu_percent = 0; + ret = smu_cmn_get_metrics_table(smu, NULL, @@ -1171,26 +1207,18 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, *value = metrics->Voltage[1]; break; case METRICS_SS_APU_SHARE: - /* return the percentage of APU power with respect to APU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of APU power boost + * with respect to APU's power limit. */ - if (metrics->StapmOriginalLimit > 0) - *value = (metrics->ApuPower * 100) / metrics->StapmOriginalLimit; - else - *value = 0; + renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = apu_percent; break; case METRICS_SS_DGPU_SHARE: - /* return the percentage of dGPU power with respect to dGPU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of dGPU power boost + * with respect to dGPU's power limit. */ - if ((metrics->dGpuPower > 0) && - (metrics->StapmCurrentLimit > metrics->StapmOriginalLimit)) - *value = (metrics->dGpuPower * 100) / - (metrics->StapmCurrentLimit - metrics->StapmOriginalLimit); - else - *value = 0; + renoir_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = dgpu_percent; break; default: *value = UINT_MAX; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index cd81f848d45ab..7f998f24af81d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1664,6 +1664,7 @@ static const struct throttling_logging_label { uint32_t feature_mask; const char *label; } logging_label[] = { + {(1U << THROTTLER_TEMP_GPU_BIT), "GPU"}, {(1U << THROTTLER_TEMP_MEM_BIT), "HBM"}, {(1U << THROTTLER_TEMP_VR_GFX_BIT), "VR of GFX rail"}, {(1U << THROTTLER_TEMP_VR_MEM_BIT), "VR of HBM rail"}, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cf09e30bdfe0b..747430ce63942 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -730,7 +730,7 @@ int smu_v13_0_set_allowed_mask(struct smu_context *smu) feature->feature_num < 64) return -EINVAL; - bitmap_copy((unsigned long *)feature_mask, feature->allowed, 64); + bitmap_to_arr32(feature_mask, feature->allowed, 64); ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetAllowedFeaturesMaskHigh, feature_mask[1], NULL); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c index e2d099409123a..feff4f8c927cc 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c @@ -190,6 +190,9 @@ static int yellow_carp_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } @@ -276,6 +279,42 @@ static int yellow_carp_mode2_reset(struct smu_context *smu) return yellow_carp_mode_reset(smu, SMU_RESET_MODE_2); } + +static void yellow_carp_get_ss_power_percent(SmuMetrics_t *metrics, + uint32_t *apu_percent, uint32_t *dgpu_percent) +{ + uint32_t apu_boost = 0; + uint32_t dgpu_boost = 0; + uint16_t apu_limit = 0; + uint16_t dgpu_limit = 0; + uint16_t apu_power = 0; + uint16_t dgpu_power = 0; + + /* APU and dGPU power values are reported in milli Watts + * and STAPM power limits are in Watts */ + apu_power = metrics->ApuPower/1000; + apu_limit = metrics->StapmOpnLimit; + if (apu_power > apu_limit && apu_limit != 0) + apu_boost = ((apu_power - apu_limit) * 100) / apu_limit; + apu_boost = (apu_boost > 100) ? 100 : apu_boost; + + dgpu_power = metrics->dGpuPower/1000; + if (metrics->StapmCurrentLimit > metrics->StapmOpnLimit) + dgpu_limit = metrics->StapmCurrentLimit - metrics->StapmOpnLimit; + if (dgpu_power > dgpu_limit && dgpu_limit != 0) + dgpu_boost = ((dgpu_power - dgpu_limit) * 100) / dgpu_limit; + dgpu_boost = (dgpu_boost > 100) ? 100 : dgpu_boost; + + if (dgpu_boost >= apu_boost) + apu_boost = 0; + else + dgpu_boost = 0; + + *apu_percent = apu_boost; + *dgpu_percent = dgpu_boost; + +} + static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) @@ -284,6 +323,8 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, SmuMetrics_t *metrics = (SmuMetrics_t *)smu_table->metrics_table; int ret = 0; + uint32_t apu_percent = 0; + uint32_t dgpu_percent = 0; ret = smu_cmn_get_metrics_table(smu, NULL, false); if (ret) @@ -332,26 +373,18 @@ static int yellow_carp_get_smu_metrics_data(struct smu_context *smu, *value = metrics->Voltage[1]; break; case METRICS_SS_APU_SHARE: - /* return the percentage of APU power with respect to APU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of APU power boost + * with respect to APU's power limit. */ - if (metrics->StapmOpnLimit > 0) - *value = (metrics->ApuPower * 100) / metrics->StapmOpnLimit; - else - *value = 0; + yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = apu_percent; break; case METRICS_SS_DGPU_SHARE: - /* return the percentage of dGPU power with respect to dGPU's power limit. - * percentage is reported, this isn't boost value. Smartshift power - * boost/shift is only when the percentage is more than 100. + /* return the percentage of dGPU power boost + * with respect to dGPU's power limit. */ - if ((metrics->dGpuPower > 0) && - (metrics->StapmCurrentLimit > metrics->StapmOpnLimit)) - *value = (metrics->dGpuPower * 100) / - (metrics->StapmCurrentLimit - metrics->StapmOpnLimit); - else - *value = 0; + yellow_carp_get_ss_power_percent(metrics, &apu_percent, &dgpu_percent); + *value = dgpu_percent; break; default: *value = UINT_MAX; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c index d63d83800a8a3..517b94c3bcaf9 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -265,6 +265,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, layer->layer_type, &n_formats); + if (!formats) { + kfree(kplane); + return -ENOMEM; + } err = drm_universal_plane_init(&kms->base, plane, get_possible_crtcs(kms, c->pipeline), @@ -275,8 +279,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, komeda_put_fourcc_list(formats); - if (err) - goto cleanup; + if (err) { + kfree(kplane); + return err; + } drm_plane_helper_add(plane, &komeda_plane_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 494075ddbef68..b5928b52e2791 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -487,7 +487,10 @@ static void malidp_crtc_reset(struct drm_crtc *crtc) if (crtc->state) malidp_crtc_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &state->base); + if (state) + __drm_atomic_helper_crtc_reset(crtc, &state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 2145b08f95346..be2fc4791c1da 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -77,6 +77,8 @@ config DRM_DISPLAY_CONNECTOR config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on OF + select DRM_DP_AUX_BUS + select DRM_DP_HELPER select DRM_KMS_HELPER select DRM_DP_HELPER select EXTCON diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 005bf18682ff8..668dcefbae17a 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1313,6 +1313,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) adv7511_audio_exit(adv7511); drm_bridge_remove(&adv7511->bridge); err_unregister_cec: + cec_unregister_adapter(adv7511->cec_adap); i2c_unregister_device(adv7511->i2c_cec); clk_disable_unprepare(adv7511->cec_clk); err_i2c_unregister_packet: diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index eb590fb8e8d0d..26d8fc3c18673 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1268,6 +1268,25 @@ static int analogix_dp_bridge_attach(struct drm_bridge *bridge, return 0; } +static +struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp, + struct drm_atomic_state *state) +{ + struct drm_encoder *encoder = dp->encoder; + struct drm_connector *connector; + struct drm_connector_state *conn_state; + + connector = drm_atomic_get_old_connector_for_encoder(state, encoder); + if (!connector) + return NULL; + + conn_state = drm_atomic_get_old_connector_state(state, connector); + if (!conn_state) + return NULL; + + return conn_state->crtc; +} + static struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp, struct drm_atomic_state *state) @@ -1448,14 +1467,16 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, { struct drm_atomic_state *old_state = old_bridge_state->base.state; struct analogix_dp_device *dp = bridge->driver_private; - struct drm_crtc *crtc; + struct drm_crtc *old_crtc, *new_crtc; + struct drm_crtc_state *old_crtc_state = NULL; struct drm_crtc_state *new_crtc_state = NULL; + int ret; - crtc = analogix_dp_get_new_crtc(dp, old_state); - if (!crtc) + new_crtc = analogix_dp_get_new_crtc(dp, old_state); + if (!new_crtc) goto out; - new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc); + new_crtc_state = drm_atomic_get_new_crtc_state(old_state, new_crtc); if (!new_crtc_state) goto out; @@ -1464,6 +1485,19 @@ analogix_dp_bridge_atomic_disable(struct drm_bridge *bridge, return; out: + old_crtc = analogix_dp_get_old_crtc(dp, old_state); + if (old_crtc) { + old_crtc_state = drm_atomic_get_old_crtc_state(old_state, + old_crtc); + + /* When moving from PSR to fully disabled, exit PSR first. */ + if (old_crtc_state && old_crtc_state->self_refresh_active) { + ret = analogix_dp_disable_psr(dp); + if (ret) + DRM_ERROR("Failed to disable psr (%d)\n", ret); + } + } + analogix_dp_bridge_disable(bridge); } @@ -1632,8 +1666,19 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct analogix_dp_device *dp = to_dp(aux); + int ret; + + pm_runtime_get_sync(dp->dev); - return analogix_dp_transfer(dp, msg); + ret = analogix_dp_detect_hpd(dp); + if (ret) + goto out; + + ret = analogix_dp_transfer(dp, msg); +out: + pm_runtime_put(dp->dev); + + return ret; } struct analogix_dp_device * @@ -1698,8 +1743,10 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dp->reg_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dp->reg_base)) - return ERR_CAST(dp->reg_base); + if (IS_ERR(dp->reg_base)) { + ret = PTR_ERR(dp->reg_base); + goto err_disable_clk; + } dp->force_hpd = of_property_read_bool(dev->of_node, "force-hpd"); @@ -1711,7 +1758,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (IS_ERR(dp->hpd_gpiod)) { dev_err(dev, "error getting HDP GPIO: %ld\n", PTR_ERR(dp->hpd_gpiod)); - return ERR_CAST(dp->hpd_gpiod); + ret = PTR_ERR(dp->hpd_gpiod); + goto err_disable_clk; } if (dp->hpd_gpiod) { @@ -1731,7 +1779,8 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) if (dp->irq == -ENXIO) { dev_err(&pdev->dev, "failed to get irq\n"); - return ERR_PTR(-ENODEV); + ret = -ENODEV; + goto err_disable_clk; } ret = devm_request_threaded_irq(&pdev->dev, dp->irq, @@ -1740,11 +1789,15 @@ analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data) irq_flags, "analogix-dp", dp); if (ret) { dev_err(&pdev->dev, "failed to request irq\n"); - return ERR_PTR(ret); + goto err_disable_clk; } disable_irq(dp->irq); return dp; + +err_disable_clk: + clk_disable_unprepare(dp->clock); + return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(analogix_dp_probe); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 31ecf5626f1d9..060849f8ad8b4 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -874,7 +874,10 @@ static int anx7625_hdcp_enable(struct anx7625_data *ctx) } /* Read downstream capability */ - anx7625_aux_trans(ctx, DP_AUX_NATIVE_READ, 0x68028, 1, &bcap); + ret = anx7625_aux_trans(ctx, DP_AUX_NATIVE_READ, 0x68028, 1, &bcap); + if (ret < 0) + return ret; + if (!(bcap & 0x01)) { pr_warn("downstream not support HDCP 1.4, cap(%x).\n", bcap); return 0; @@ -1475,12 +1478,12 @@ static void anx7625_dp_adjust_swing(struct anx7625_data *ctx) for (i = 0; i < ctx->pdata.dp_lane0_swing_reg_cnt; i++) anx7625_reg_write(ctx, ctx->i2c.tx_p1_client, DP_TX_LANE0_SWING_REG0 + i, - ctx->pdata.lane0_reg_data[i] & 0xFF); + ctx->pdata.lane0_reg_data[i]); for (i = 0; i < ctx->pdata.dp_lane1_swing_reg_cnt; i++) anx7625_reg_write(ctx, ctx->i2c.tx_p1_client, DP_TX_LANE1_SWING_REG0 + i, - ctx->pdata.lane1_reg_data[i] & 0xFF); + ctx->pdata.lane1_reg_data[i]); } static void dp_hpd_change_handler(struct anx7625_data *ctx, bool on) @@ -1587,8 +1590,8 @@ static int anx7625_get_swing_setting(struct device *dev, num_regs = DP_TX_SWING_REG_CNT; pdata->dp_lane0_swing_reg_cnt = num_regs; - of_property_read_u32_array(dev->of_node, "analogix,lane0-swing", - pdata->lane0_reg_data, num_regs); + of_property_read_u8_array(dev->of_node, "analogix,lane0-swing", + pdata->lane0_reg_data, num_regs); } if (of_get_property(dev->of_node, @@ -1597,8 +1600,8 @@ static int anx7625_get_swing_setting(struct device *dev, num_regs = DP_TX_SWING_REG_CNT; pdata->dp_lane1_swing_reg_cnt = num_regs; - of_property_read_u32_array(dev->of_node, "analogix,lane1-swing", - pdata->lane1_reg_data, num_regs); + of_property_read_u8_array(dev->of_node, "analogix,lane1-swing", + pdata->lane1_reg_data, num_regs); } return 0; @@ -2654,7 +2657,7 @@ static int anx7625_i2c_probe(struct i2c_client *client, if (ret) { if (ret != -EPROBE_DEFER) DRM_DEV_ERROR(dev, "fail to parse DT : %d\n", ret); - return ret; + goto free_wq; } if (anx7625_register_i2c_dummy_clients(platform, client) != 0) { @@ -2669,7 +2672,7 @@ static int anx7625_i2c_probe(struct i2c_client *client, pm_suspend_ignore_children(dev, true); ret = devm_add_action_or_reset(dev, anx7625_runtime_disable, dev); if (ret) - return ret; + goto free_wq; if (!platform->pdata.low_power_mode) { anx7625_disable_pd_protocol(platform); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index edbbfe410a56e..e257a84db9626 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -426,9 +426,9 @@ struct anx7625_platform_data { int mipi_lanes; int audio_en; int dp_lane0_swing_reg_cnt; - int lane0_reg_data[DP_TX_SWING_REG_CNT]; + u8 lane0_reg_data[DP_TX_SWING_REG_CNT]; int dp_lane1_swing_reg_cnt; - int lane1_reg_data[DP_TX_SWING_REG_CNT]; + u8 lane1_reg_data[DP_TX_SWING_REG_CNT]; u32 low_power_mode; struct device_node *mipi_host_node; }; diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index d9b7f48b99fbf..c871a90c0b8f4 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -15,8 +15,19 @@ #include #include -#include